def __init__(self,
                 device_id_facedetection,
                 checkpoint_path_face=None,
                 checkpoint_path_landmark=None,
                 model_points_file=None):
        self.model_size_rescale = 16.0
        self.head_pitch = 0.0
        self.interpupillary_distance = 0.058
        self.eye_image_size = (60, 36)

        tqdm.write("Using device {} for face detection.".format(
            device_id_facedetection))

        self.face_net = SFDDetector(device=device_id_facedetection,
                                    path_to_detector=checkpoint_path_face)
        self.facial_landmark_nn = self.load_face_landmark_model(
            checkpoint_path_landmark)

        self.model_points = self.get_full_model_points(model_points_file)
class LandmarkMethodBase(object):
    def __init__(self,
                 device_id_facedetection,
                 checkpoint_path_face=None,
                 checkpoint_path_landmark=None,
                 model_points_file=None):
        download_external_landmark_models()
        self.model_size_rescale = 16.0
        self.head_pitch = 0.0
        self.interpupillary_distance = 0.058
        self.eye_image_size = (60, 36)

        tqdm.write("Using device {} for face detection.".format(
            device_id_facedetection))

        self.device = device_id_facedetection
        self.face_net = SFDDetector(device=device_id_facedetection,
                                    path_to_detector=checkpoint_path_face)
        self.facial_landmark_nn = self.load_face_landmark_model(
            checkpoint_path_landmark)

        self.model_points = self.get_full_model_points(model_points_file)

    def load_face_landmark_model(self, checkpoint_fp=None):
        import rt_gene.ThreeDDFA.mobilenet_v1 as mobilenet_v1
        if checkpoint_fp is None:
            import rospkg
            checkpoint_fp = rospkg.RosPack().get_path(
                'rt_gene') + '/model_nets/phase1_wpdc_vdc.pth.tar'
        arch = 'mobilenet_1'

        checkpoint = torch.load(
            checkpoint_fp,
            map_location=lambda storage, loc: storage)['state_dict']
        model = getattr(mobilenet_v1, arch)(
            num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

        model_dict = model.state_dict()
        # because the model is trained by multiple gpus, prefix module should be removed
        for k in checkpoint.keys():
            model_dict[k.replace('module.', '')] = checkpoint[k]
        model.load_state_dict(model_dict)
        cudnn.benchmark = True
        model = model.to(self.device)
        model.eval()
        return model

    def get_full_model_points(self, model_points_file=None):
        """Get all 68 3D model points from file"""
        raw_value = []
        if model_points_file is None:
            import rospkg
            model_points_file = rospkg.RosPack().get_path(
                'rt_gene') + '/model_nets/face_model_68.txt'

        with open(model_points_file) as f:
            for line in f:
                raw_value.append(line)
        model_points = np.array(raw_value, dtype=np.float32)
        model_points = np.reshape(model_points, (3, -1)).T

        # index the expansion of the model based.
        model_points = model_points * (self.interpupillary_distance *
                                       self.model_size_rescale)

        return model_points

    def get_face_bb(self, image):
        faceboxes = []
        fraction = 4.0
        image = cv2.resize(image, (0, 0), fx=1.0 / fraction, fy=1.0 / fraction)
        detections = self.face_net.detect_from_image(image)

        for result in detections:
            # scale back up to image size
            box = result[:4]
            confidence = result[4]

            if gaze_tools.box_in_image(box, image) and confidence > 0.6:
                box = [x * fraction for x in box]  # scale back up
                diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
                offset_y = int(abs(diff_height_width / 2))
                box_moved = gaze_tools.move_box(box, [0, offset_y])

                # Make box square.
                facebox = gaze_tools.get_square_box(box_moved)
                faceboxes.append(facebox)

        return faceboxes

    @staticmethod
    def visualize_headpose_result(face_image, est_headpose):
        """Here, we take the original eye eye_image and overlay the estimated headpose."""
        output_image = np.copy(face_image)

        center_x = output_image.shape[1] / 2
        center_y = output_image.shape[0] / 2

        endpoint_x, endpoint_y = gaze_tools.get_endpoint(
            est_headpose[1], est_headpose[0], center_x, center_y, 100)

        cv2.line(output_image, (int(center_x), int(center_y)),
                 (int(endpoint_x), int(endpoint_y)), (0, 0, 255), 3)
        return output_image

    def ddfa_forward_pass(self, color_img, roi_box_list):
        img_step = [crop_img(color_img, roi_box) for roi_box in roi_box_list]
        img_step = [
            cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
            for img in img_step
        ]
        _input = torch.cat(
            [facial_landmark_transform(img).unsqueeze(0) for img in img_step],
            0)
        with torch.no_grad():
            _input = _input.to(self.device)
            param = self.facial_landmark_nn(_input).cpu().numpy().astype(
                np.float32)

        return [
            predict_68pts(p.flatten(), roi_box)
            for p, roi_box in zip(param, roi_box_list)
        ]

    def get_subjects_from_faceboxes(self, color_img, faceboxes):
        face_images = [
            gaze_tools.crop_face_from_image(color_img, b) for b in faceboxes
        ]
        subjects = []
        roi_box_list = [
            parse_roi_box_from_bbox(facebox) for facebox in faceboxes
        ]
        initial_pts68_list = self.ddfa_forward_pass(color_img, roi_box_list)
        roi_box_refined_list = [
            parse_roi_box_from_landmark(initial_pts68)
            for initial_pts68 in initial_pts68_list
        ]
        pts68_list = self.ddfa_forward_pass(color_img, roi_box_refined_list)

        for pts68, face_image, facebox in zip(pts68_list, face_images,
                                              faceboxes):
            np_landmarks = np.array((pts68[0], pts68[1])).T
            subjects.append(
                TrackedSubject(np.array(facebox), face_image, np_landmarks))
        return subjects
class LandmarkMethodBase(object):
    def __init__(self,
                 device_id_facedetection,
                 checkpoint_path_face=None,
                 checkpoint_path_landmark=None,
                 model_points_file=None):
        download_external_landmark_models()
        self.model_size_rescale = 16.0
        self.head_pitch = 0.0
        self.interpupillary_distance = 0.058
        self.eye_image_size = (60, 36)

        tqdm.write("Using device {} for face detection.".format(
            device_id_facedetection))

        self.device = device_id_facedetection
        self.face_net = SFDDetector(device=device_id_facedetection,
                                    path_to_detector=checkpoint_path_face)
        self.facial_landmark_nn = self.load_face_landmark_model(
            checkpoint_path_landmark)

        self.model_points = self.get_full_model_points(model_points_file)

    def load_face_landmark_model(self, checkpoint_fp=None):
        import rt_gene.ThreeDDFA.mobilenet_v1 as mobilenet_v1
        if checkpoint_fp is None:
            import rospkg
            checkpoint_fp = rospkg.RosPack().get_path(
                'rt_gene') + '/model_nets/phase1_wpdc_vdc.pth.tar'
        arch = 'mobilenet_1'

        checkpoint = torch.load(
            checkpoint_fp,
            map_location=lambda storage, loc: storage)['state_dict']
        model = getattr(mobilenet_v1, arch)(
            num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

        model_dict = model.state_dict()
        # because the model is trained by multiple gpus, prefix module should be removed
        for k in checkpoint.keys():
            model_dict[k.replace('module.', '')] = checkpoint[k]
        model.load_state_dict(model_dict)
        cudnn.benchmark = True
        model = model.to(self.device)
        model.eval()
        return model

    def get_full_model_points(self, model_points_file=None):
        """Get all 68 3D model points from file"""
        raw_value = []
        if model_points_file is None:
            import rospkg
            model_points_file = rospkg.RosPack().get_path(
                'rt_gene') + '/model_nets/face_model_68.txt'

        with open(model_points_file) as f:
            for line in f:
                raw_value.append(line)
        model_points = np.array(raw_value, dtype=np.float32)
        model_points = np.reshape(model_points, (3, -1)).T

        # index the expansion of the model based.
        model_points = model_points * (self.interpupillary_distance *
                                       self.model_size_rescale)

        return model_points

    def change_to_prevIndex(self, boxes, prev_boxes):
        del_idx = []
        copy_boxes = np.copy(boxes)
        result_idx = []
        if prev_boxes[0][0] != 0:
            for b in range(len(boxes)):
                pb_idx = b
                min = sys.maxsize
                for pb in range(len(prev_boxes)):
                    diff = abs(boxes[b][0] - prev_boxes[pb][0])
                    if diff < min:
                        min = diff
                        pb_idx = pb
                result_idx.append(pb_idx)
            if (len(boxes) < len(prev_boxes)) and len(result_idx) != 0:
                for idx in range(len(prev_boxes) - 1, -1, -1):
                    if idx not in result_idx:
                        del_idx.append(idx)
                        print("idx is not in idx_result :", "idx:", idx,
                              "result_idx", result_idx)
                        # print("bodxes:",boxes, "prev:", prev_boxes)
                        for i in range(len(result_idx)):
                            if result_idx[i] > idx:
                                result_idx[i] -= 1

            for i in range(len(boxes)):
                boxes[result_idx[i]] = copy_boxes[i]

        for i in range(len(boxes)):
            if i > len(prev_boxes) - 1:
                prev_boxes.append(boxes[i])
            else:
                prev_boxes[i] = boxes[i]
        return del_idx

    def get_init_value(self, image):
        faceboxes = self.get_face_bb(image)
        bbox_l = []
        for facebox in faceboxes:
            bbox_l.append(facebox[0])

        return len(faceboxes), bbox_l

    def get_face_bb(self, image):
        faceboxes = []
        fraction = 4.0
        image = cv2.resize(image, (0, 0), fx=1.0 / fraction, fy=1.0 / fraction)
        detections = self.face_net.detect_from_image(image)

        for result in detections:
            # scale back up to image size
            box = result[:4]
            confidence = result[4]

            if gaze_tools.box_in_image(box, image) and confidence > 0.6:
                box = [x * fraction for x in box]  # scale back up
                diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
                offset_y = int(abs(diff_height_width / 2))
                box_moved = gaze_tools.move_box(box, [0, offset_y])

                # Make box square.
                facebox = gaze_tools.get_square_box(box_moved)
                faceboxes.append(facebox)

        return faceboxes

    @staticmethod
    def visualize_headpose_result(img,
                                  facebox,
                                  est_headpose,
                                  people,
                                  GTlabel=[]):
        #ese_headpose = (phi_head, theta_head)
        """Here, we take the original eye eye_image and overlay the estimated headpose."""
        output_image = np.copy(img)
        box_point = np.copy(facebox)

        center_x = (box_point[2] + box_point[0]) / 2
        center_y = (box_point[3] + box_point[1]) / 2
        endpoint_x, endpoint_y = gaze_tools.get_endpoint(
            est_headpose[1], est_headpose[0], center_x, center_y, 100)

        # flip vector
        #flip_endpoint_x, flip_endpoint_y = gaze_tools.get_endpoint(est_headpose[1], -est_headpose[0], center_x, center_y, 100)
        # cv2.arrowedLine(output_image, (int(center_x), int(center_y)), (int(flip_endpoint_x), int(flip_endpoint_y)), (0, 0, 255),2)

        headpose_error = 0
        if GTlabel != []:
            GT_endpoint_x, GT_endpoint_y = gaze_tools.get_endpoint(
                GTlabel[2], GTlabel[1], center_x, center_y, 100)
            headpose_error = gaze_tools.get_error(
                [GT_endpoint_x, GT_endpoint_y], [endpoint_x, endpoint_y],
                [center_x, center_y])

        #Smoothing
        # endpoint_x ,endpoint_y =people.getEndpointAverage(endpoint_x, endpoint_y, "headpose")

        cv2.arrowedLine(output_image, (int(center_x), int(center_y)),
                        (int(endpoint_x), int(endpoint_y)), (0, 0, 255), 2)

        return output_image, headpose_error

    # def visualize_headpose_result(face_image, est_headpose):
    #     """Here, we take the original eye eye_image and overlay the estimated headpose."""
    #     output_image = np.copy(face_image)
    #
    #     center_x = output_image.shape[1] / 2
    #     center_y = output_image.shape[0] / 2
    #
    #     endpoint_x, endpoint_y = gaze_tools.get_endpoint(est_headpose[1], est_headpose[0], center_x, center_y, 100)
    #
    #     cv2.arrowedLine(output_image, (int(center_x), int(center_y)), (int(endpoint_x), int(endpoint_y)), (0, 0, 255),
    #                     2)
    #     return output_image

    def ddfa_forward_pass(self, color_img, roi_box_list):
        # facebox위치에서 크기만큼 이미지를 자름 ->크롭 얼굴 이미지 리스트., 120x120으로 사이즈 조절
        img_step = [crop_img(color_img, roi_box) for roi_box in roi_box_list]
        img_step = [
            cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
            for img in img_step
        ]

        _input = torch.cat(
            [facial_landmark_transform(img).unsqueeze(0) for img in img_step],
            0)
        with torch.no_grad():
            _input = _input.to(self.device)
            #model return
            param = self.facial_landmark_nn(_input).cpu().numpy().astype(
                np.float32)

        return [
            predict_68pts(p.flatten(), roi_box)
            for p, roi_box in zip(param, roi_box_list)
        ]

    def get_subjects_from_faceboxes(self, color_img, faceboxes):
        #face crop
        face_images = [
            gaze_tools.crop_face_from_image(color_img, b) for b in faceboxes
        ]
        subjects = []

        #각 facebox의 x,y좌표의 min,max 총 4개의 요소를 가진 리스트를 가지는 리스트
        #0~4 까지 각각 xmin, ymin. xmax, ymax
        roi_box_list = [
            parse_roi_box_from_bbox(facebox) for facebox in faceboxes
        ]

        initial_pts68_list = self.ddfa_forward_pass(color_img, roi_box_list)
        roi_box_refined_list = [
            parse_roi_box_from_landmark(initial_pts68)
            for initial_pts68 in initial_pts68_list
        ]
        #plot landmark point
        pts68_list = self.ddfa_forward_pass(color_img, roi_box_refined_list)

        #face_image를 color_img로 변경함.
        for pts68, face_image, facebox in zip(pts68_list, face_images,
                                              faceboxes):
            np_landmarks = np.array((pts68[0], pts68[1])).T
            subjects.append(
                TrackedSubject(np.array(facebox), face_image, np_landmarks))
        return subjects