Пример #1
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
Пример #2
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))