示例#1
0
        f.write(' {:.2f}'.format(ann['alpha']))
        for tmp in bbox:
          f.write(' {:.2f}'.format(tmp))
        for tmp in dim:
          f.write(' {:.2f}'.format(tmp))
        for tmp in location:
          f.write(' {:.2f}'.format(tmp))
        f.write(' {:.2f}'.format(rotation_y))
        f.write('\n')


        if DEBUG and tmp[0] != 'DontCare':
          #box_3d = compute_box_3d(dim, location, rotation_y)
          #box_2d = project_to_image(box_3d, calib)
          # print('box_2d', box_2d)
          image = draw_box_3d(image, box_2d)
          image = draw_box_2d(image, bbox_crop)
          x = (bbox[0] + bbox[2]) / 2
          '''
          print('rot_y, alpha2rot_y, dlt', tmp[0], 
                rotation_y, alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0]),
                np.cos(
                  rotation_y - alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0])))
          '''
          depth = np.array([location[2]], dtype=np.float32)
          pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                            dtype=np.float32)
          pt_3d = unproject_2d_to_3d(pt_2d, depth, calib)
          pt_3d[1] += dim[0] / 2
          print('pt_3d', pt_3d)
          print('location', location)
示例#2
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))
示例#3
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
示例#4
0
            if xmin_projected < 0 :
                xmin_projected = 0
            if ymin_projected < 0 :
                ymin_projected = 0
            if xmax_projected > img_w -1:
                xmax_projected = img_w - 1
            if ymax_projected > img_h -1:
                ymax_projected = img_h -1
            projected_box_2d = [xmin_projected, ymin_projected,xmax_projected,ymax_projected]
            draw_2d_box(img, xmin_projected, ymin_projected, xmax_projected, ymax_projected, c=(0, 255, 255))

            gt_label_list[3] = projected_box_2d
            gt_dict_per_img[cat_id].append(gt_label_list)
            count_gt_dict_per_cat[cat_id]+=1

            img = draw_box_3d(img, box_2d, c=(128, 128, 128))
            # cv2.imshow(str(image_id), img)
            # cv2.waitKey()
            # 이제 3d bounding box를 image에 투영시킴
        # pd_2d
        pd_path = pd_dir + '{}.txt'.format(image_id)
        pd = open(pd_path, 'r')

        pd_lines = pd.readlines()
        if len(pd_lines) == 0:
            print("empty gt file")
            continue
        pd_list = removeWhiteSpaceInfile(pd_lines)

        # generate pd_dict_per_img
        for txt_3d in pd_list:
示例#5
0
            if ymax_projected > img_h - 1:
                ymax_projected = img_h - 1
            projected_box_2d = [
                xmin_projected, ymin_projected, xmax_projected, ymax_projected
            ]
            draw_2d_box(img,
                        xmin_projected,
                        ymin_projected,
                        xmax_projected,
                        ymax_projected,
                        c=(0, 255, 255))

            gt_label_list[3] = projected_box_2d
            gt_dict_per_img[cat_id].append(gt_label_list)

            img = draw_box_3d(img, box_2d, c=(128, 128, 128))
            # cv2.imshow(str(image_id), img)
            # cv2.waitKey()
            # 이제 3d bounding box를 image에 투영시킴
        # pd_2d
        pd_path_Aug = pd_dir_Aug + '{}.txt'.format(image_id)
        pd_path_not_Aug = pd_dir_Aug + '{}.txt'.format(image_id)
        pd_Aug = open(pd_path_Aug, 'r')

        pd_lines_Aug = pd_Aug.readlines()

        cat_pd, pd_label_list = make_pd_list(pd_lines_Aug)
        if cat_pd == -1:
            continue
        pd_dict_per_img[cat_pd].append(pd_label_list)
        # pd2_bbox per object
示例#6
0
                    bbox2D_gt_eval = key_val_gt_list[index_gt][3]
                    # world(x, y, z) = cam(x, -z, y)
                    depth_gt = loc_w_gt_eval[1]


                    #cam_loc
                    z = depth_gt
                    x = (center_2d_pd_x * z - calib[0, 2] * z) / calib[0, 0]
                    y = (center_2d_pd_y * z - calib[1, 2] * z) / calib[1, 1]
                    #world_loc
                    pt_3d_world = np.array([x, z, -y], dtype=np.float32)

                    rot_yaw_w_gt = compute_rot_yaw_w(yaw_gt_eval)
                    box_3d_loc_gt = compute_3d_box(dim_w_gt_eval, loc_w_gt_eval, rot_yaw_w_gt)
                    box_2d_loc_gt = project_to_image(box_3d_loc_gt, calib)
                    draw_box_3d(img, box_2d_loc_gt, (128, 128, 128))

                    rot_yaw_w_pd = compute_rot_yaw_w(yaw_pd_eval)
                    cos_pd = np.cos(yaw_pd_eval)
                    sin_pd = np.sin(yaw_pd_eval)
                    # rot_yaw_w_pd = np.array([[cos_pd, -sin_pd, 0], [sin_pd, cos_pd, 0], [0, 0, 1]], dtype=np.float32)
                    loc_test = [loc_w_pd_eval[0], loc_w_gt_eval[1], loc_w_pd_eval[2]]
                    # box_3d_loc_pd = compute_3d_box(dim_w_pd_eval, loc_w_pd_eval, rot_yaw_w_pd)
                    box_3d_loc_pd = compute_3d_box(dim_w_pd_eval, pt_3d_world, rot_yaw_w_pd)
                    box_2d_loc_pd = project_to_image(box_3d_loc_pd, calib)
                    draw_box_3d(img, box_2d_loc_pd, (0, 255, 255))

                    # box_3d_loc = compute_3d_box(dim_w_pd_eval, loc_w_pd_eval, rot_yaw_w_pd)
                    # box_2d_loc = project_to_image(box_3d_loc, calib)
                    # draw_box_3d(img, box_2d_loc, (0, 255, 0))
                ymin_projected = 0
            if xmax_projected >= img_w - 1:
                xmax_projected = img_w - 1
            if ymax_projected >= img_h:
                ymax_projected = img_h - 1
            projected_box_2d = [
                xmin_projected, ymin_projected, xmax_projected, ymax_projected
            ]
            gt_label_list[3] = projected_box_2d
            # cv2.line(img, (int(xmin_projected), int(ymin_projected)), (int(xmax_projected), int(ymin_projected)), (0, 255, 255), 2, lineType=cv2.LINE_AA)
            # cv2.line(img, (int(xmax_projected), int(ymin_projected)), (int(xmax_projected), int(ymax_projected)), (0, 255, 255), 2, lineType=cv2.LINE_AA)
            # cv2.line(img, (int(xmin_projected), int(ymax_projected)), (int(xmin_projected), int(ymin_projected)), (0, 255, 255), 2, lineType=cv2.LINE_AA)
            # cv2.line(img, (int(xmax_projected), int(ymax_projected)), (int(xmin_projected), int(ymax_projected)), (0, 255, 255), 2, lineType=cv2.LINE_AA)

            print(gt_label_list)
            draw_box_3d(img, box_2d, (128, 128, 128))
            gt_dict_per_img[cat_id].append(gt_label_list)
            # cv2.imshow(str(image_id), img)
            # cv2.waitKey()
        # pd_2d
        pd_path = pd_dir + '{}.txt'.format(image_id)
        pd = open(pd_path, 'r')

        pd_lines = pd.readlines()
        if len(pd_lines) == 0:
            print("empty gt file")
            continue
        pd_list = removeWhiteSpaceInfile(pd_lines)

        # generate pd_dict_per_img
        for txt_3d in pd_list:
示例#8
0
    def create_pc_pillars(self, img, img_info, pc_2d, pc_3d, inp_trans,
                          out_trans):
        pillar_wh = np.zeros((2, pc_3d.shape[1]))
        boxes_2d = np.zeros((0, 8, 2))
        pillar_dim = self.opt.pillar_dims
        v = np.dot(np.eye(3), np.array([1, 0, 0]))
        ry = -np.arctan2(v[2], v[0])

        for i, center in enumerate(pc_3d[:3, :].T):
            # Create a 3D pillar at pc location for the full-size image
            box_3d = compute_box_3d(dim=pillar_dim,
                                    location=center,
                                    rotation_y=ry)
            box_2d = project_to_image(box_3d, img_info['calib']).T  # [2x8]

            ## save the box for debug plots
            if self.opt.debug:
                box_2d_img, m = self._transform_pc(box_2d,
                                                   inp_trans,
                                                   self.opt.input_w,
                                                   self.opt.input_h,
                                                   filter_out=False)
                boxes_2d = np.concatenate(
                    (boxes_2d, np.expand_dims(box_2d_img.T, 0)), 0)

            # transform points
            box_2d_t, m = self._transform_pc(box_2d, out_trans,
                                             self.opt.output_w,
                                             self.opt.output_h)

            if box_2d_t.shape[1] <= 1:
                continue

            # get the bounding box in [xyxy] format
            bbox = [
                np.min(box_2d_t[0, :]),
                np.min(box_2d_t[1, :]),
                np.max(box_2d_t[0, :]),
                np.max(box_2d_t[1, :])
            ]  # format: xyxy

            # store height and width of the 2D box
            pillar_wh[0, i] = bbox[2] - bbox[0]
            pillar_wh[1, i] = bbox[3] - bbox[1]

        ## DEBUG #################################################################
        if self.opt.debug:
            img_2d = copy.deepcopy(img)
            # img_3d = copy.deepcopy(img)
            img_2d_inp = cv2.warpAffine(img,
                                        inp_trans,
                                        (self.opt.input_w, self.opt.input_h),
                                        flags=cv2.INTER_LINEAR)
            img_2d_out = cv2.warpAffine(img,
                                        out_trans,
                                        (self.opt.output_w, self.opt.output_h),
                                        flags=cv2.INTER_LINEAR)
            img_3d = cv2.warpAffine(img,
                                    inp_trans,
                                    (self.opt.input_w, self.opt.input_h),
                                    flags=cv2.INTER_LINEAR)
            blank_image = 255 * np.ones(
                (self.opt.input_h, self.opt.input_w, 3), np.uint8)
            overlay = img_2d_inp.copy()
            output = img_2d_inp.copy()

            pc_inp, _ = self._transform_pc(pc_2d, inp_trans, self.opt.input_w,
                                           self.opt.input_h)
            pc_out, _ = self._transform_pc(pc_2d, out_trans, self.opt.output_w,
                                           self.opt.output_h)

            pill_wh_inp = pillar_wh * (self.opt.input_w / self.opt.output_w)
            pill_wh_out = pillar_wh
            pill_wh_ori = pill_wh_inp * 2

            for i, p in enumerate(pc_inp[:3, :].T):
                color = int((p[2].tolist() / 60.0) * 255)
                color = (0, color, 0)

                rect_tl = (np.min(int(p[0] - pill_wh_inp[0, i] / 2),
                                  0), np.min(int(p[1] - pill_wh_inp[1, i]), 0))
                rect_br = (np.min(int(p[0] + pill_wh_inp[0, i] / 2),
                                  0), int(p[1]))
                cv2.rectangle(img_2d_inp,
                              rect_tl,
                              rect_br, (0, 0, 255),
                              1,
                              lineType=cv2.LINE_AA)
                img_2d_inp = cv2.circle(img_2d_inp, (int(p[0]), int(p[1])), 3,
                                        color, -1)

                ## On original-sized image
                rect_tl_ori = (np.min(int(pc_2d[0, i] - pill_wh_ori[0, i] / 2),
                                      0),
                               np.min(int(pc_2d[1, i] - pill_wh_ori[1, i]), 0))
                rect_br_ori = (np.min(int(pc_2d[0, i] + pill_wh_ori[0, i] / 2),
                                      0), int(pc_2d[1, i]))
                cv2.rectangle(img_2d,
                              rect_tl_ori,
                              rect_br_ori, (0, 0, 255),
                              2,
                              lineType=cv2.LINE_AA)
                img_2d = cv2.circle(img_2d,
                                    (int(pc_2d[0, i]), int(pc_2d[1, i])), 6,
                                    color, -1)

                p2 = pc_out[:3, i].T
                rect_tl2 = (np.min(int(p2[0] - pill_wh_out[0, i] / 2),
                                   0), np.min(int(p2[1] - pill_wh_out[1, i]),
                                              0))
                rect_br2 = (np.min(int(p2[0] + pill_wh_out[0, i] / 2),
                                   0), int(p2[1]))
                cv2.rectangle(img_2d_out,
                              rect_tl2,
                              rect_br2, (0, 0, 255),
                              1,
                              lineType=cv2.LINE_AA)
                img_2d_out = cv2.circle(img_2d_out, (int(p[0]), int(p[1])), 3,
                                        (255, 0, 0), -1)

                # on blank image
                cv2.rectangle(blank_image,
                              rect_tl,
                              rect_br,
                              color,
                              -1,
                              lineType=cv2.LINE_AA)

                # overlay
                alpha = 0.1
                cv2.rectangle(overlay,
                              rect_tl,
                              rect_br,
                              color,
                              -1,
                              lineType=cv2.LINE_AA)
                cv2.addWeighted(overlay, alpha, output, 1 - alpha, 0, output)

                # plot 3d pillars
                img_3d = draw_box_3d(img_3d,
                                     boxes_2d[i].astype(np.int32),
                                     [114, 159, 207],
                                     same_color=False)

            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_inp.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d_inp)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_ori.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_out.' + self.opt.img_format) \
                        .format(self.img_ind), img_2d_out)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_blank.' + self.opt.img_format) \
                        .format(self.img_ind), blank_image)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_2d_overlay.' + self.opt.img_format) \
                        .format(self.img_ind), output)
            cv2.imwrite((self.opt.debug_dir + '/{}pc_pillar_3d.' + self.opt.img_format) \
                        .format(self.img_ind), img_3d)
            self.img_ind += 1
        ## DEBUG #################################################################
        return pillar_wh
            "nightstand": [],
            "sofa": [],
            "table": [],
            "toilet": []
        }

        anns = open(ann_path, 'r')
        anns_lines = anns.readlines()
        anns_list = removeWhiteSpaceInfile(anns_lines)
        # generate gt_dict_per_img
        for txt in anns_list:
            tmp = txt.split(',')  # white space제거
            cat_id, gt_label_list, box_3d = read_gt_label(tmp, Rtilt)
            gt_dict_per_img[cat_id].append(gt_label_list)
            box_2d = project_to_image(box_3d, calib)
            img = draw_box_3d(img, box_2d, c=(128, 128, 128))
            # cv2.imshow(str(image_id), img)
            # cv2.waitKey()
            # 이제 3d bounding box를 image에 투영시킴
        # pd_2d
        pd_path = pd_dir + '{}.txt'.format(image_id)
        pd = open(pd_path, 'r')

        pd_lines = pd.readlines()
        if len(pd_lines) == 0:
            print("empty gt file")
            continue
        pd_list = removeWhiteSpaceInfile(pd_lines)

        # generate pd_dict_per_img
        for txt_3d in pd_list:
示例#10
0
        pd_list = removeWhiteSpaceInfile(pd_lines)

        # generate pd_dict_per_img
        for txt_3d in pd_list:
            tmp_pd = txt_3d[1:-1].split(',')
            cat_pd, pd_label_list, box_3d_pd, loc_w_pd_tilt = read_pd_label(tmp_pd, Rtilt)
            # if cat_pd != 'table' and cat_pd != 'desk':
            #     continue
            yaw_pd =str(pd_label_list[2] * 180 / math.pi)[:4]
            cl = colors_jw[ID.get(cat_pd)]
            box_2d_pd = project_to_image(box_3d_pd, calib)
            loc_c_pd_tilt = [loc_w_pd_tilt[i] for i in [0,2,1]]
            loc_c_pd_tilt[1] = -loc_c_pd_tilt[1]
            loc_2d_pd = project_to_image(np.array([loc_c_pd_tilt]), calib)
            if cat_pd == 'table' or cat_pd == 'desk':
                img = draw_box_3d(img, box_2d_pd, c=cl)

                # cv2.circle(img, (int(loc_2d_pd[0][0]), int(loc_2d_pd[0][1])), 5, cl, -1)

                rot_end_x_pd = loc_2d_pd[0][0] + np.cos(pd_label_list[2]) * 50
                rot_end_y_pd = loc_2d_pd[0][1] - np.sin(pd_label_list[2]) * 50
                cv2.arrowedLine(img, (int(loc_2d[0][0]), int(loc_2d[0][1])),
                                (int(rot_end_x_pd), int(rot_end_y_pd)), cl, 2)
                # cv2.circle(self.imgs[img_id], (int(ct_x), int(ct_y)), 10, (255, 0, 0), -1)
                print("pred_yawAngle:{}".format(yaw_pd))
                labelSize = cv2.getTextSize(yaw_pd, cv2.FONT_HERSHEY_COMPLEX, 0.5, 4)
                # show : class
                _x1 = int(loc_2d_pd[0][0]) + 5
                _y1 = int(loc_2d_pd[0][1]) -3
                _x2 = _x1 + labelSize[0][0] + 15
                _y2 = _y1 + labelSize[0][1] + 5