def get_IOU(img_original, bboxes, segms, six_dof, car_id2name, car_model_dict,
            unique_car_mode, camera_matrix):
    img = img_original[1480:, :, :].copy()
    bboxes_with_IOU = np.zeros((bboxes.shape[0], bboxes.shape[1] + 1)).astype(
        bboxes.dtype)  ## we add IOU score for each line

    quaternion_pred = six_dof['quaternion_pred']
    euler_angles = np.array(
        [quaternion_to_euler_angle(x) for x in quaternion_pred])
    car_cls_score_pred = six_dof['car_cls_score_pred']
    trans_pred_world = six_dof['trans_pred_world']
    car_labels = np.argmax(car_cls_score_pred, axis=1)
    kaggle_car_labels = [unique_car_mode[x] for x in car_labels]
    car_names = np.array([car_id2name[x].name for x in kaggle_car_labels])
    for bbox_idx in range(len(bboxes)):
        box = bboxes[bbox_idx]
        t = trans_pred_world[bbox_idx]
        ## below is the predicted mask
        mask_all_pred = np.zeros(
            img.shape[:-1])  ## this is the background mask
        mask_all_mesh = np.zeros(img.shape[:-1])
        mask_pred = maskUtils.decode(segms[bbox_idx]).astype(np.bool)
        mask_all_pred += mask_pred

        vertices = np.array(car_model_dict[car_names[bbox_idx]]['vertices'])
        vertices[:, 1] = -vertices[:, 1]
        triangles = np.array(car_model_dict[car_names[bbox_idx]]['faces']) - 1

        ea = euler_angles[bbox_idx]
        yaw, pitch, roll = ea[0], ea[1], ea[2]
        yaw, pitch, roll = -pitch, -yaw, -roll
        Rt = np.eye(4)
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
        P[:, :-1] = vertices
        P = P.T

        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]

        for tri in triangles:
            coord = np.array([
                img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2],
                img_cor_points[tri[2]][:2]
            ],
                             dtype=np.int32)
            coord[:, 1] -= 1480
            cv2.drawContours(mask_all_mesh, np.int32([coord]), 0, 1, -1)

        intersection_area = np.sum(mask_all_pred * mask_all_mesh)
        union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh))
        iou_score = intersection_area / union_area
        bboxes_with_IOU[bbox_idx] = np.append(box, iou_score)
    return bboxes_with_IOU
def visual_PnP(img, PnP_pred, camera_matrix, vertices, triangles):
    """Draw bboxes and class labels (with scores) on an image.

    Args:
        img (str or ndarray): The image to be displayed.
        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
            (n, 5).
        labels (ndarray): Labels of bboxes.
        class_names (list[str]): Names of each classes.
        score_thr (float): Minimum score of bboxes to be shown.
        bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
        text_color (str or tuple or :obj:`Color`): Color of texts.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename to write the image.
    """

    for pcar_idx in range(len(PnP_pred)):
        # now we draw mesh
        pcar = PnP_pred[pcar_idx]
        t = pcar['x'], pcar['y'], pcar['z']
        yaw, pitch, roll = pcar['yaw'], pcar['pitch'], pcar['roll']
        Rt = np.eye(4)
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
        P[:, :-1] = vertices
        P = P.T
        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]

        color_mesh = np.random.randint(0, 256, (1, 3), dtype=np.uint8)

        color_tuple = tuple([int(x) for x in color_mesh[0]])
        for t in triangles:
            coord = np.array([
                img_cor_points[t[0]][:2], img_cor_points[t[1]][:2],
                img_cor_points[t[2]][:2]
            ],
                             dtype=np.int32)
            cv2.polylines(img, np.int32([coord]), 1, color=color_tuple)

    return img
def get_iou_score(bbox_idx, car_model_dict, camera_matrix, class_names,
                  mask_all_pred, mask_all_mesh, mask_all_pred_area,
                  euler_angle, t):
    vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices'])
    vertices[:, 1] = -vertices[:, 1]
    triangles = np.array(car_model_dict[class_names[bbox_idx]]['faces']) - 1

    ea = euler_angle[bbox_idx]
    yaw, pitch, roll = ea[0], ea[1], ea[2]
    yaw, pitch, roll = -pitch, -yaw, -roll
    Rt = np.eye(4)
    Rt[:3, 3] = t
    Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
    Rt = Rt[:3, :]
    P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
    P[:, :-1] = vertices
    P = P.T

    img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
    img_cor_points = img_cor_points.T
    img_cor_points[:, 0] /= img_cor_points[:, 2]
    img_cor_points[:, 1] /= img_cor_points[:, 2]

    mask_all_mesh_tmp = mask_all_mesh.copy()
    for tri in triangles:
        coord = np.array([
            img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2],
            img_cor_points[tri[2]][:2]
        ],
                         dtype=np.int32)
        coord[:, 1] -= 1480
        cv2.drawContours(mask_all_mesh_tmp, np.int32([coord]), 0, 1, -1)
        # cv2.drawContours(img,np.int32([coord]),0,color,-1)

    intersection_area = np.sum(mask_all_pred * mask_all_mesh_tmp)
    union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh_tmp))
    iou_mask_score = intersection_area / mask_all_pred_area
    iou_score = intersection_area / union_area
    return iou_mask_score, iou_score
def finetune_RT(
        output,
        dataset,
        loss_grayscale_light=0.05,
        loss_grayscale_RT=0.05,
        loss_IoU=0.9,
        num_epochs=50,
        draw_flag=True,
        lr=0.05,  # lr=0.05,
        conf_thresh=0.8,
        tmp_save_dir='/data/Kaggle/wudi_data/tmp_output/',
        fix_rot=True,
        num_car_for_light_rendering=2):
    """
    We first get the lighting parameters: using 2 cars gray scale,
    then use grayscale loss and IoU loss to update T, and R(optional)
    :param outputs:
    :param dataset:
    :param loss_grayscale_light:
    :param loss_grayscale_RT: default: 0.05 is a good guess
    :param loss_IoU:
    :param num_epochs:  num epochs for both lighting and R,T
    :param draw_flag:
    :param lr:
    :param conf_thresh: confidence threshold for NMR process from bboxes, if lower, we will not process
                        this individual car--> because we don't care and accelerate the learning process
    :param tmp_save_dir: tmp saving directory for plotting .gif images
    :param fix_rot: fix rotation, if set to True, we will not learn rotation
    :param fix_trans:  fix translation, if set to True, we will not learn translation--> most likely we are
                        learning the lighting is set to True
    :param fix_light_source:  fix light source parameters if set to True
    :param num_car_for_light_rendering: default is 2 (consume 9 Gb GPU memory),
                                        for P100, we could use 3.
                                        We use the closest (smallest z) for rendering
                                        because the closer, the bigger car and more grayscale information.
    :return: the modified outputs
    """
    CAR_IDX = 2
    output_gif = None
    outputs_update = [output].copy()
    camera_matrix = dataset.camera_matrix.copy()
    camera_matrix[1, 2] -= 1480  # Because we have only bottom half
    # First we collect all the car instances info. in an image
    bboxes, segms, six_dof = output[0], output[1], output[2]
    car_cls_score_pred = six_dof['car_cls_score_pred']
    quaternion_pred = six_dof['quaternion_pred']
    trans_pred_world = six_dof['trans_pred_world']
    car_labels = np.argmax(car_cls_score_pred, axis=1)
    kaggle_car_labels = [dataset.unique_car_mode[x] for x in car_labels]
    car_names = [car_id2name[x].name for x in kaggle_car_labels]
    euler_angles = np.array(
        [quaternion_to_euler_angle(x) for x in quaternion_pred])

    conf = output[0][CAR_IDX][:, -1]  # output [0] is the bbox
    conf_list = conf > conf_thresh
    # We choose the closest z two cars
    idx_conf = np.array([False] * len(conf))  # We choose only one car

    lighting_count = 0
    for close_idx in np.argsort(trans_pred_world[:, -1]):
        if conf_list[close_idx]:
            idx_conf[close_idx] = True
            lighting_count += 1
            if lighting_count >= num_car_for_light_rendering:
                break

    # Di Wu parrallise the code as below for one image per GPU
    rgb_image = imread(output[2]['file_name'])
    # convert the rgb image to grayscale
    grayscale_image = color.rgb2gray(rgb_image)

    vertices_img = []
    max_vertices = 0
    faces_img = []
    # there are in total 4999-5000 faces... we choose 4999 faces, for some car, not rendering one
    # face should be alright.
    min_faces = 4999
    Rotation_Matrix_img = []
    T_img = []
    euler_angles_img = []
    mask_img = []

    for car_idx in range(len(quaternion_pred)):
        # The the HTC predicted Mask which is served as the GT Mask
        segms_car = segms[CAR_IDX][car_idx]
        mask = maskUtils.decode(segms_car)
        # Get car mesh--> vertices and faces
        car_name = car_names[car_idx]
        vertices = np.array(dataset.car_model_dict[car_name]['vertices'])
        vertices[:, 1] = -vertices[:, 1]
        faces = np.array(dataset.car_model_dict[car_name]['faces']) - 1
        # Get prediction of Rotation Matrix and  Translation
        ea = euler_angles[car_idx]
        yaw, pitch, roll = ea[0], ea[1], ea[2]
        yaw, pitch, roll = -pitch, -yaw, -roll
        Rotation_Matrix = euler_to_Rot(yaw, pitch, roll).T
        T = trans_pred_world[car_idx]

        vertices_img.append(vertices)
        max_vertices = max(vertices.shape[0], max_vertices)
        faces_img.append(faces)
        min_faces = min(faces.shape[0], min_faces)
        Rotation_Matrix_img.append(Rotation_Matrix)
        T_img.append(T)
        euler_angles_img.append(np.array([yaw, pitch, roll]))
        mask_img.append(mask)

    Rotation_Matrix_img = np.stack(Rotation_Matrix_img)
    T_img = np.stack(T_img)
    euler_angles_img = np.stack(euler_angles_img)
    mask_img = np.stack(mask_img)
    masked_grayscale_img = mask_img[idx_conf].sum(
        axis=0) * grayscale_image[1480:, :]
    masked_grayscale_img = masked_grayscale_img / masked_grayscale_img.max()
    # For vertices and faces each car will generate different
    vertices_img_all = np.zeros((len(vertices_img), max_vertices, 3))
    faces_img_all = np.zeros((len(faces_img), min_faces, 3))

    for i in range(len(vertices_img)):
        vertices_img_all[i, :vertices_img[i].shape[0], :] = vertices_img[i]
        faces_img_all[i, :, :] = faces_img[i][:min_faces, :]

    if draw_flag:
        output_gif = tmp_save_dir + '/' + output[2]['file_name'].split(
            '/')[-1][:-4] + '.gif'

    # Now we start to fine tune R, T
    for i, true_flag in enumerate(conf_list):
        if true_flag:
            if draw_flag:
                output_gif = tmp_save_dir + '/' + output[2]['file_name'].split(
                    '/')[-1][:-4] + '_' + str(i) + '.gif'
            # Now we consider only one masked grayscale car
            masked_grayscale_car = mask_img[i] * grayscale_image[1480:, :]
            # masked_grayscale_car = masked_grayscale_car / masked_grayscale_car.max()
            T_update, ea_update = get_updated_RT(
                vertices=vertices_img_all[None, i],
                faces=faces_img_all[None, i],
                Rotation_Matrix=Rotation_Matrix_img[None, i],
                T=T_img[None, i],
                euler_angle=euler_angles_img[i],
                mask_full_size=mask_img[None, i],
                masked_grayscale_img=masked_grayscale_car,
                camera_matrix=camera_matrix,
                image_size=(3384, 2710 - 1480),
                loss_RT=loss_IoU,
                num_epochs=num_epochs,
                draw_flag=draw_flag,
                output_gif=output_gif,
                lr=lr,
                fix_rot=fix_rot)

            if fix_rot:
                # we don't change the euler angle here
                R_update = -euler_angles_img[i][1], -euler_angles_img[i][
                    0], -euler_angles_img[i][2]
            else:
                # We need to reverse here
                R_update = -ea_update[1], -ea_update[0], -ea_update[2]

            # outputs_update is a list of length 0
            outputs_update[0][2]['trans_pred_world'][i] = T_update
            euler_angles[i] = R_update

        if not fix_rot:
            outputs_update[0][2]['euler_angle'] = euler_angles

        if not os.path.exists(tmp_save_dir):
            os.mkdir(tmp_save_dir)
        output_name = tmp_save_dir + '/' + output[2]['file_name'].split(
            '/')[-1][:-4] + '.pkl'
        mmcv.dump(outputs_update[0], output_name)
    return
    def load_anno_idx(
        self,
        idx,
        img_concat,
        train,
        draw_dir='/data/home/yyj/code/kaggle/new_code/Kaggle_PKU_Baidu/data/pku_data/crop_visualization/crop_mesh'
    ):

        bboxes = []
        img1, img2, img3 = img_concat
        mask_all = np.zeros(img1.shape)
        merged_image1 = img1.copy()
        merged_image2 = img2.copy()
        merged_image3 = img3.copy()
        alpha = 0.8  # transparency

        gt = self._str2coords(train['PredictionString'].iloc[idx])
        for gt_pred in gt:
            eular_angle = np.array(
                [gt_pred['yaw'], gt_pred['pitch'], gt_pred['roll']])

            translation = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']])
            quaternion = euler_angles_to_quaternions(eular_angle)
            quaternion_semisphere = quaternion_upper_hemispher(quaternion)

            new_eular_angle = quaternion_to_euler_angle(quaternion_semisphere)

            # rendering the car according to:
            # https://www.kaggle.com/ebouteillon/augmented-reality

            # car_id2name is from:
            # https://github.com/ApolloScapeAuto/dataset-api/blob/master/car_instance/car_models.py
            car_name = car_id2name[gt_pred['id']].name
            vertices = np.array(self.car_model_dict[car_name]['vertices'])
            vertices[:, 1] = -vertices[:, 1]
            triangles = np.array(self.car_model_dict[car_name]['faces']) - 1

            # project 3D points to 2d image plane
            yaw, pitch, roll = gt_pred['yaw'], gt_pred['pitch'], gt_pred[
                'roll']
            # I think the pitch and yaw should be exchanged
            yaw, pitch, roll = -pitch, -yaw, -roll
            Rt = np.eye(4)
            t = np.array([gt_pred['x'], gt_pred['y'], gt_pred['z']])
            Rt[:3, 3] = t
            Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
            Rt = Rt[:3, :]
            P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
            P[:, :-1] = vertices
            P = P.T

            img_cor_points = np.dot(self.camera_matrix, np.dot(Rt, P))
            img_cor_points = img_cor_points.T
            img_cor_points[:, 0] /= img_cor_points[:, 2]
            img_cor_points[:, 1] /= img_cor_points[:, 2]

            # project 3D points to 2d image plane
            x1, y1, x2, y2 = img_cor_points[:,
                                            0].min(), img_cor_points[:, 1].min(
                                            ), img_cor_points[:, 0].max(
                                            ), img_cor_points[:, 1].max()
            bboxes.append([x1, y1, x2, y2])

            # project 3D points to 2d image plane
            mask_seg = np.zeros(img1.shape, dtype=np.uint8)
            mask_seg_mesh = np.zeros(img1.shape, dtype=np.uint8)
            for t in triangles:
                coord = np.array([
                    img_cor_points[t[0]][:2], img_cor_points[t[1]][:2],
                    img_cor_points[t[2]][:2]
                ],
                                 dtype=np.int32)
                # This will draw the mask for segmenation
                cv2.drawContours(mask_seg, np.int32([coord]), 0, (0, 0, 255),
                                 -1)
                # cv2.polylines(mask_seg_mesh, np.int32([coord]), 1, (0, 255, 0))

            mask_all += mask_seg

        # if False:
        mask_all = mask_all * 255 / mask_all.max()
        cv2.addWeighted(img1.astype(np.uint8), 1.0, mask_all.astype(np.uint8),
                        alpha, 0, merged_image1)
        cv2.addWeighted(img2.astype(np.uint8), 1.0, mask_all.astype(np.uint8),
                        alpha, 0, merged_image2)
        cv2.addWeighted(img3.astype(np.uint8), 1.0, mask_all.astype(np.uint8),
                        alpha, 0, merged_image3)

        imwrite(merged_image1,
                os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_1.jpg'))
        imwrite(merged_image2,
                os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_2.jpg'))
        imwrite(merged_image3,
                os.path.join(draw_dir, train['ImageId'].iloc[idx] + '_3.jpg'))
def imdraw_det_bboxes(img,
                      bboxes,
                      class_names,
                      car_model_dict,
                      camera_matrix,
                      trans_pred_world,
                      euler_angle,
                      color_lists,
                      score_thr=0,
                      bbox_color='green',
                      text_color='green',
                      thickness=1,
                      font_scale=0.5):
    """Draw bboxes and class labels (with scores) on an image.

    Args:
        img (str or ndarray): The image to be displayed.
        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
            (n, 5).
        labels (ndarray): Labels of bboxes.
        class_names (list[str]): Names of each classes.
        score_thr (float): Minimum score of bboxes to be shown.
        bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
        text_color (str or tuple or :obj:`Color`): Color of texts.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename to write the image.
    """
    assert bboxes.ndim == 2
    assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5
    img_original = img.copy()
    img = img_original[1480:, :, :]

    if score_thr > 0:
        assert bboxes.shape[1] == 5
        scores = bboxes[:, -1]
        inds = scores > score_thr
        bboxes = bboxes[inds, :]
        trans_pred_world = trans_pred_world[inds, :]
        euler_angle = euler_angle[inds, :]
    assert len(bboxes) == len(trans_pred_world) == len(euler_angle)
    bbox_color = color_val(bbox_color)
    text_color = color_val(text_color)

    for bbox_idx in range(len(bboxes)):
        bbox = bboxes[bbox_idx]
        label_text = class_names[bbox_idx]
        bbox_int = bbox.astype(np.int32)
        left_top = (bbox_int[0], bbox_int[1])
        right_bottom = (bbox_int[2], bbox_int[3])
        cv2.rectangle(img,
                      left_top,
                      right_bottom,
                      bbox_color,
                      thickness=thickness)
        if len(bbox) > 4:
            label_text += '|{:.02f}'.format(bbox[-1])
        cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)

        # now we draw mesh
        vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices'])
        vertices[:, 1] = -vertices[:, 1]
        triangles = np.array(
            car_model_dict[class_names[bbox_idx]]['faces']) - 1

        t = trans_pred_world[bbox_idx]
        ea = euler_angle[bbox_idx]
        yaw, pitch, roll = ea[0], ea[1], ea[2]
        yaw, pitch, roll = -pitch, -yaw, -roll
        Rt = np.eye(4)
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
        P[:, :-1] = vertices
        P = P.T

        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]

        color_mesh = np.int32(color_lists[bbox_idx][0])
        color_tuple = tuple([int(x) for x in color_mesh])
        for t in triangles:
            coord = np.array([
                img_cor_points[t[0]][:2], img_cor_points[t[1]][:2],
                img_cor_points[t[2]][:2]
            ],
                             dtype=np.int32)
            # This will draw the mask for segmenation
            # cv2.drawContours(mask_seg, np.int32([coord]), 0, (255, 255, 255), -1)
            coord[:, 1] -= 1480
            cv2.polylines(img, np.int32([coord]), 1, color=color_tuple)

    im_combime = img_original.copy()
    im_combime[1480:, :, :] = img
    return im_combime
def draw_box_mesh_kaggle_pku(
    img_original,
    bboxes,
    segms,
    class_names,
    car_model_dict,
    camera_matrix,
    trans_pred_world,
    euler_angle,
    score_thr=0.8,
    thickness=1,
    transparency=0.5,
    font_scale=0.8,
):
    img = img_original[1480:, :, :].copy()  ## crop half

    iou_flag = False
    trans_pred_world_raw = trans_pred_world.copy()
    if score_thr > 0:
        inds = bboxes[:, -1] > score_thr
        bboxes = bboxes[inds, :]
        segms = np.array(segms)[inds]
        trans_pred_world = trans_pred_world[inds, :]
        euler_angle = euler_angle[inds, :]
        class_names = class_names[inds]

    for bbox_idx in range(len(bboxes)):
        color_ndarray = np.random.randint(0, 256, (1, 3), dtype=np.uint8)
        color = tuple([int(i) for i in color_ndarray[0]])
        bbox = bboxes[bbox_idx]

        ## below is the predicted mask
        mask_all_pred = np.zeros(
            img.shape[:-1])  ## this is the background mask
        mask_all_mesh = np.zeros(img.shape[:-1])
        mask_pred = maskUtils.decode(segms[bbox_idx]).astype(np.bool)
        mask_all_pred += mask_pred
        mask_all_pred_area = np.sum(mask_all_pred == 1)
        # img[mask_pred] = img[mask_pred] * (1-transparency) + color_ndarray * transparency

        label_text = class_names[bbox_idx]
        bbox_int = bbox.astype(np.int32)
        left_top = (bbox_int[0], bbox_int[1])
        right_bottom = (bbox_int[2], bbox_int[3])
        t = trans_pred_world[bbox_idx]

        ## time to draw mesh
        vertices = np.array(car_model_dict[class_names[bbox_idx]]['vertices'])
        vertices[:, 1] = -vertices[:, 1]
        triangles = np.array(
            car_model_dict[class_names[bbox_idx]]['faces']) - 1

        ea = euler_angle[bbox_idx]
        yaw, pitch, roll = ea[0], ea[1], ea[2]
        yaw, pitch, roll = -pitch, -yaw, -roll
        Rt = np.eye(4)
        Rt[:3, 3] = t
        Rt[:3, :3] = euler_to_Rot(yaw, pitch, roll).T
        Rt = Rt[:3, :]
        P = np.ones((vertices.shape[0], vertices.shape[1] + 1))
        P[:, :-1] = vertices
        P = P.T

        img_cor_points = np.dot(camera_matrix, np.dot(Rt, P))
        img_cor_points = img_cor_points.T
        img_cor_points[:, 0] /= img_cor_points[:, 2]
        img_cor_points[:, 1] /= img_cor_points[:, 2]

        for tri in triangles:
            coord = np.array([
                img_cor_points[tri[0]][:2], img_cor_points[tri[1]][:2],
                img_cor_points[tri[2]][:2]
            ],
                             dtype=np.int32)
            coord[:, 1] -= 1480
            cv2.polylines(img, np.int32([coord]), 1, color, thickness=1)
            cv2.drawContours(mask_all_mesh, np.int32([coord]), 0, 1, -1)
            # cv2.drawContours(img,np.int32([coord]),0,color,-1)

        intersection_area = np.sum(mask_all_pred * mask_all_mesh)
        union_area = np.sum(np.logical_or(mask_all_pred, mask_all_mesh))
        iou_mask_score = round(intersection_area / mask_all_pred_area, 3)
        iou_score = round(intersection_area / union_area, 3)
        label_text_t = ''
        cls_score = bboxes[bbox_idx][-1]

        if iou_score < 0.5:
            print('iou_score', iou_score, cls_score)

            iou_flag = True
        # for i in ea:
        #     i = round(i,4)
        #     label_text_t += str(i)
        #     label_text_t += ' '
        #
        # for i in t:
        #     i = round(i,4)
        #     label_text_t += str(i)
        #     label_text_t += ' '
        # label_text_t += str(iou_mask_score) + ' ' + str(iou_score) + ' ' + str(cls_score)
        label_text_t += str(iou_score) + ' ' + str(cls_score)
        cv2.rectangle(img, left_top, right_bottom, color, thickness=thickness)
        if len(bbox) > 4:
            label_text += '|{:.02f}'.format(bbox[-1])
        cv2.putText(img, label_text_t, (bbox_int[0], bbox_int[1] - 2),
                    cv2.FONT_ITALIC, font_scale, color)
    im_combime = img_original.copy()
    im_combime[1480:, :, :] = img
    return im_combime, iou_flag