def _fancy_deep_learning(frame):
    """Creates a prediction objects file."""
    o_list = []

    for camera_labels in frame.camera_labels:
        if camera_labels.name != 1:  #Only use front camera
            continue
        for gt_label in camera_labels.labels:
            o = metrics_pb2.Object()
            # The following 3 fields are used to uniquely identify a frame a prediction
            # is predicted at.
            o.context_name = frame.context.name
            # The frame timestamp for the prediction. See Frame::timestamp_micros in
            # dataset.proto.
            o.frame_timestamp_micros = frame.timestamp_micros
            # This is only needed for 2D detection or tracking tasks.
            # Set it to the camera name the prediction is for.
            o.camera_name = camera_labels.name

            # Populating box and score.
            box = label_pb2.Label.Box()
            box.center_x = gt_label.box.center_x
            box.center_y = gt_label.box.center_y
            box.length = gt_label.box.length
            box.width = gt_label.box.width
            o.object.box.CopyFrom(box)
            # This must be within [0.0, 1.0]. It is better to filter those boxes with
            # small scores to speed up metrics computation.
            o.score = 0.9
            # Use correct type.
            o.object.type = gt_label.type
            o_list.append(o)

    return o_list
예제 #2
0
    def _load_meta(self, frame, camera_id):

        o = metrics_pb2.Object()
        o.camera_name = camera_id
        o.context_name = frame.context.name
        o.frame_timestamp_micros = frame.timestamp_micros
        return o
예제 #3
0
def createsubmisioncameraobject(objects, boxes, pred_cls, scores, context_name,
                                frame_timestamp_micros, camera_name):
    total_boxes = len(boxes)
    for i in range(total_boxes):  # patch in pred_bbox:
        label = pred_cls[i]
        #(center_x, center_y, width, height) in image size
        bbox = boxes[i]  #[1246.5217, 750.64905, 113.49747, 103.9653]
        score = scores[i]
        o = metrics_pb2.Object()
        o.context_name = context_name  # frame.context.name
        # frame.timestamp_micros)
        o.frame_timestamp_micros = int(frame_timestamp_micros)
        o.camera_name = camera_name  #dataset_pb2.CameraName.FRONT
        o.score = score

        # Populating box and score.
        box = label_pb2.Label.Box()
        box.center_x = bbox[0]
        box.center_y = bbox[1]
        box.width = bbox[2]
        box.length = bbox[3]
        # box.length = bbox[1][0] - bbox[0][0]
        # box.width = bbox[1][1] - bbox[0][1]
        # box.center_x = bbox[0][0] + box.length * 0.5
        # box.center_y = bbox[0][1] + box.width * 0.5

        o.object.box.CopyFrom(box)
        o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1
        o.object.num_lidar_points_in_box = 100
        # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label]
        o.object.type = INSTANCEindex_pb2[label]  #INSTANCE_pb2[label]
        # print(
        #     f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}')
        assert o.object.type != label_pb2.Label.TYPE_UNKNOWN
        objects.objects.append(o)
예제 #4
0
def format_tracking_result(box, context_name, timestamp_micros):
    """ Convert a Bbox3D into an instance of class metrics_pb2.Object so that it can be serialized in
    Waymo OD 's format

    Args:
        box (Bbox3D): box converted from tracklet's State
        context_name (str): name of context
        timestamp_micros (int): time stamp of the frame where this box appears
    Returns:
        metrics_pb2.Object
    """
    assert box.frame == 'ego_vehicle', "box must be in 'ego_vehicle' frame, while right now it is in {}".format(
        box.frame)
    o = metrics_pb2.Object()
    o.context_name = context_name
    o.frame_timestamp_micros = timestamp_micros
    # populate box & score
    waymo_box = label_pb2.Label.Box()
    waymo_box.center_x = box.center[0]
    waymo_box.center_y = box.center[1]
    waymo_box.center_z = box.center[2]
    waymo_box.length = box.l
    waymo_box.width = box.w
    waymo_box.height = box.h
    waymo_box.heading = box.yaw
    o.object.box.CopyFrom(waymo_box)
    o.score = box.score
    o.object.id = '{}-{}'.format(box.obj_type, box.id)
    o.object.type = waymo_tracking_names[box.obj_type]
    return o
예제 #5
0
def create_waymo_sumbit(waymo_annos, out_path):
    print('Creating Waymo submission...')
    objects = metrics_pb2.Objects()

    for token in tqdm(waymo_annos):
        for waymo_anno in waymo_annos[token]:
            o = metrics_pb2.Object()
            o.context_name = waymo_anno['context_name']
            o.frame_timestamp_micros = waymo_anno['frame_timestamp_micros']

            box = label_pb2.Label.Box()
            box.center_x = waymo_anno['ego_box_3d'][0]
            box.center_y = waymo_anno['ego_box_3d'][1]
            box.center_z = waymo_anno['ego_box_3d'][2]
            box.length = waymo_anno['dimension'][2]
            box.width = waymo_anno['dimension'][1]
            box.height = waymo_anno['dimension'][0]
            box.heading = waymo_anno['ego_heading']
            o.object.box.CopyFrom(box)

            o.score = waymo_anno['score']
            o.object.id = str(waymo_anno['object_id'])
            o.object.type = waymo_anno['cat']

            o.object.num_lidar_points_in_box = 100

            objects.objects.append(o)

    # Write objects to a file.
    with open(out_path, 'wb') as f:
        f.write(objects.SerializeToString())
예제 #6
0
def create_object(frame_features, x_max, x_min, y_max, y_min, label, difficulty=None, score=None):
    obj = metrics_pb2.Object()

    def create_label(img_width, img_height, x_max, x_min, y_max, y_min, label_type, difficulty=None):
        lab = label_pb2.Label()
        lab.box.center_x = (x_max + x_min) / 2 * img_width
        lab.box.center_y = (y_max + y_min) / 2 * img_height
        lab.box.length = (x_max - x_min) * img_width
        lab.box.width = (y_max - y_min) * img_height

        lab.type = 4 if label_type == 3 else label_type  # Revert cyclist label to 4
        if difficulty is not None:
            lab.detection_difficulty_level = 1 if difficulty == 0 else difficulty
        return lab

    label = create_label(frame_features['image/width'], frame_features['image/height'],
                         x_max, x_min, y_max, y_min, label, difficulty)
    obj.object.MergeFrom(label)
    if score:
        obj.score = score
    obj.context_name = frame_features["image/context_name"]
    obj.frame_timestamp_micros = frame_features["image/frame_timestamp_micros"]
    obj.camera_name = frame_features["image/camera_name"]

    return obj
예제 #7
0
def tfRecordToBin(data_file_name, data_type):

    if data_type == 'vehicle':
        data_type_label = label_pb2.Label.TYPE_VEHICLE
    elif data_type == 'pedestrian':
        data_type_label = label_pb2.Label.TYPE_PEDESTRIAN
    elif data_type == 'cyclist':
        data_type_label = label_pb2.Label.TYPE_CYCLIST
    else:
        print("Usage: python tfRecordDataToLabel.py data.tfrecord vehicle")
        sys.exit(1)

    result_file_name = data_file_name[:-9] + "_" + data_type + "_gt" + '.bin'

    objs = metrics_pb2.Objects()

    dataset = tf.data.TFRecordDataset(data_file_name, compression_type='')
    for data in dataset:
        frame = dataset_pb2.Frame()
        frame.ParseFromString(bytearray(data.numpy()))
        for frame_obj in frame.laser_labels:
            if not frame_obj.type == data_type_label:
                continue
            obj = metrics_pb2.Object()
            obj.object.box.CopyFrom(frame_obj.box)
            obj.object.type = frame_obj.type
            obj.object.id = frame_obj.id
            obj.context_name = frame.context.name
            obj.frame_timestamp_micros = frame.timestamp_micros
            objs.objects.append(obj)

    with open(result_file_name, 'wb') as f:
        f.write(objs.SerializeToString())
예제 #8
0
    def anns2proto(self, outfile_prefix):
        anns = self.coco.anns
        ground_truths = metrics_pb2.Objects()

        for _, ann in anns.items():
            obj = metrics_pb2.Object()

            img_info = self.data_infos[ann['image_id']]

            x1, y1, w, h = ann['bbox']

            cx = x1 + w / 2
            cy = y1 + h / 2

            lab = label_pb2.Label()
            lab.box.center_x = cx
            lab.box.center_y = cy
            lab.box.length = w
            lab.box.width = h
            lab.type = 4 if ann['category_id'] == 3 else ann['category_id']
            lab.detection_difficulty_level = 1 if ann[
                'det_difficult'] == 0 else ann['det_difficult']
            obj.object.MergeFrom(lab)

            obj.context_name = img_info["context_name"]
            obj.frame_timestamp_micros = img_info["timestamp_micros"]
            obj.camera_name = img_info["camera_id"]

            ground_truths.objects.append(obj)

        f = open(outfile_prefix + "_gt.bin", 'wb')
        serialized = ground_truths.SerializeToString()
        f.write(serialized)
        f.close()
예제 #9
0
def createsubmisionobject(objects, boxes, pred_cls, scores, context_name,
                          frame_timestamp_micros):
    total_boxes = len(boxes)
    for i in range(total_boxes):  #patch in pred_bbox:
        label = pred_cls[i]
        bbox = boxes[i]
        score = scores[i]
        o = metrics_pb2.Object()
        o.context_name = context_name  #frame.context.name
        o.frame_timestamp_micros = int(
            frame_timestamp_micros)  #frame.timestamp_micros)
        o.camera_name = dataset_pb2.CameraName.FRONT
        o.score = score

        # Populating box and score.
        box = label_pb2.Label.Box()
        box.length = bbox[1][0] - bbox[0][0]
        box.width = bbox[1][1] - bbox[0][1]
        box.center_x = bbox[0][0] + box.length * 0.5
        box.center_y = bbox[0][1] + box.width * 0.5

        o.object.box.CopyFrom(box)
        o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1
        o.object.num_lidar_points_in_box = 100
        o.object.type = INSTANCE_pb2[
            label]  # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label]
        print(
            f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}'
        )
        assert o.object.type != label_pb2.Label.TYPE_UNKNOWN
        objects.objects.append(o)
예제 #10
0
def _create_gt_detection(infos, tracking=True):
    """Creates a gt prediction object file for local evaluation."""
    from waymo_open_dataset import label_pb2
    from waymo_open_dataset.protos import metrics_pb2
    
    objects = metrics_pb2.Objects()

    for idx in tqdm(range(len(infos))): 
        info = infos[idx]

        obj = get_obj(info['path'])
        annos = obj['objects']
        num_points_in_gt = np.array([ann['num_points'] for ann in annos])
        box3d = np.array([ann['box'] for ann in annos])

        if len(box3d) == 0:
            continue 

        names = np.array([TYPE_LIST[ann['label']] for ann in annos])

        box3d = box3d[:, [0, 1, 2, 3, 4, 5, -1]]

        for i in range(box3d.shape[0]):
            if num_points_in_gt[i] == 0:
                continue 
            if names[i] == 'UNKNOWN':
                continue 

            det  = box3d[i]
            score = 1.0
            label = names[i]

            o = metrics_pb2.Object()
            o.context_name = obj['scene_name']
            o.frame_timestamp_micros = int(obj['frame_name'].split("_")[-1])

            # Populating box and score.
            box = label_pb2.Label.Box()
            box.center_x = det[0]
            box.center_y = det[1]
            box.center_z = det[2]
            box.length = det[3]
            box.width = det[4]
            box.height = det[5]
            box.heading = det[-1]
            o.object.box.CopyFrom(box)
            o.score = score
            # Use correct type.
            o.object.type = CAT_NAME_TO_ID[label]
            o.object.num_lidar_points_in_box = num_points_in_gt[i]
            o.object.id = annos[i]['name']

            objects.objects.append(o)
        
    # Write objects to a file.
    f = open(os.path.join(args.result_path, 'gt_preds.bin'), 'wb')
    f.write(objects.SerializeToString())
    f.close()
예제 #11
0
def create_result(pose_v, theta_v, trackers, tracking_name, scene_token,
                  current_sample_token):
    """Creates a prediction objects file."""
    objects = metrics_pb2.Objects()

    o = metrics_pb2.Object()
    # The following 3 fields are used to uniquely identify a frame a prediction
    # is predicted at. Make sure you set them to values exactly the same as what
    # we provided in the raw data. Otherwise your prediction is considered as a
    # false positive.
    o.context_name = scene_token
    # The frame timestamp for the prediction. See Frame::timestamp_micros in
    # dataset.proto.
    invalid_ts = -1
    o.frame_timestamp_micros = current_sample_token
    # This is only needed for 2D detection or tracking tasks.
    # Set it to the camera name the prediction is for.
    #o.camera_name = dataset_pb2.CameraName.FRONT

    # Populating box and score.
    box = label_pb2.Label.Box()
    box.center_x = pose_v[0]
    box.center_y = pose_v[1]
    box.center_z = pose_v[2]
    box.length = trackers[2]
    box.width = trackers[1]
    box.height = trackers[0]
    box.heading = 0
    o.object.box.CopyFrom(box)
    # This must be within [0.0, 1.0]. It is better to filter those boxes with
    # small scores to speed up metrics computation.
    o.score = 0.5
    # For tracking, this must be set and it must be unique for each tracked
    # sequence.
    # tracking_id = np.array(trackers[7])
    # tracking_id = tracking_id.tobytes()
    # tracking_id = tracking_id.encode('utf-8')
    o.object.id = str(int(trackers[7]))
    # Use correct type.
    if tracking_name == 1:
        o.object.type = label_pb2.Label.TYPE_VEHICLE
    elif tracking_name == 2:
        o.object.type = label_pb2.Label.TYPE_PEDESTRIAN
    elif tracking_name == 4:
        o.object.type = label_pb2.Label.TYPE_CYCLIST
    objects.objects.append(o)

    # Add more objects. Note that a reasonable detector should limit its maximum
    # number of boxes predicted per frame. A reasonable value is around 400. A
    # huge number of boxes can slow down metrics computation.

    # Write objects to a file.
    f = open('/home/shk642/waymo/waymo-od/waymo-dataset-viewer/tmp/pred00.bin',
             'wb')
    f.write(objects.SerializeToString())
    f.close()
예제 #12
0
    def dump_detection_output(self, idx: Union[int, tuple],
                              detections: Target3DArray,
                              fout: RawIOBase) -> None:
        '''
        :param detections: detection result
        :param ids: auxiliary information for output, each item contains context name and timestamp
        :param fout: output file-like object
        '''
        try:
            from waymo_open_dataset import label_pb2
            from waymo_open_dataset.protos import metrics_pb2
        except:
            _logger.error(
                "Cannot find waymo_open_dataset, install the package at "
                "https://github.com/waymo-research/waymo-open-dataset, output will be skipped now."
            )
            return

        label_map = {
            WaymoObjectClass.Unknown: label_pb2.Label.TYPE_UNKNOWN,
            WaymoObjectClass.Vehicle: label_pb2.Label.TYPE_VEHICLE,
            WaymoObjectClass.Pedestrian: label_pb2.Label.TYPE_PEDESTRIAN,
            WaymoObjectClass.Sign: label_pb2.Label.TYPE_SIGN,
            WaymoObjectClass.Cyclist: label_pb2.Label.TYPE_CYCLIST
        }

        waymo_array = metrics_pb2.Objects()
        for target in detections:
            waymo_target = metrics_pb2.Object()

            # convert box parameters
            box = label_pb2.Label.Box()
            box.center_x = target.position[0]
            box.center_y = target.position[1]
            box.center_z = target.position[2]
            box.length = target.dimension[0]
            box.width = target.dimension[1]
            box.height = target.dimension[2]
            box.heading = target.yaw
            waymo_target.object.box.CopyFrom(box)

            # convert label
            waymo_target.object.type = label_map[target.tag_top]
            waymo_target.score = target.tag_top_score

            waymo_target.context_name = idx[
                0]  # the name of the sequence is the context
            waymo_target.frame_timestamp_micros = int(
                self.timestamp(idx) * 1e6)
            waymo_array.objects.append(waymo_target)

        bindata = waymo_array.SerializeToString()
        if isinstance(fout, (str, Path)):
            Path(fout).write_bytes(bindata)
        else:
            fout.write(bindata)
예제 #13
0
def _create_pd_detection(detections, infos, result_path, tracking=False):
    """Creates a prediction objects file."""
    assert tracking is False, "Not Supported Yet"
    from waymo_open_dataset import dataset_pb2
    from waymo_open_dataset import label_pb2
    from waymo_open_dataset.protos import metrics_pb2
    from waymo_open_dataset.utils import box_utils

    objects = metrics_pb2.Objects()

    for token, detection in tqdm(detections.items()):
        info = infos[token]
        obj = get_obj(info['path'])

        box3d = detection["box3d_lidar"].detach().cpu().numpy()
        scores = detection["scores"].detach().cpu().numpy()
        labels = detection["label_preds"].detach().cpu().numpy()
        box3d[:, -1] = -box3d[:, -1] - np.pi / 2

        if box3d.shape[1] > 7:
            # drop velocity
            box3d = box3d[:, [0, 1, 2, 3, 4, 5, -1]]

        for i in range(box3d.shape[0]):
            det = box3d[i]
            score = scores[i]

            label = labels[i]

            o = metrics_pb2.Object()
            o.context_name = obj['scene_name']
            o.frame_timestamp_micros = int(obj['frame_name'].split("_")[-1])

            # Populating box and score.
            box = label_pb2.Label.Box()
            box.center_x = det[0]
            box.center_y = det[1]
            box.center_z = det[2]
            box.length = det[3]
            box.width = det[4]
            box.height = det[5]
            box.heading = det[-1]
            o.object.box.CopyFrom(box)
            o.score = score
            # Use correct type.
            o.object.type = label_to_type(label)  # int(label)+1

            objects.objects.append(o)

    # Write objects to a file.
    f = open(os.path.join(result_path, 'my_preds.bin'), 'wb')
    f.write(objects.SerializeToString())
    f.close()
        def parse_one_object(instance_idx):
            """Parse one instance in kitti format and convert them to `Object`
            proto.

            Args:
                instance_idx (int): Index of the instance to be converted.

            Returns:
                :obj:`Object`: Predicted instance in waymo dataset \
                    Object proto.
            """
            cls = kitti_result['name'][instance_idx]
            length = round(kitti_result['dimensions'][instance_idx, 0], 4)
            height = round(kitti_result['dimensions'][instance_idx, 1], 4)
            width = round(kitti_result['dimensions'][instance_idx, 2], 4)
            x = round(kitti_result['location'][instance_idx, 0], 4)
            y = round(kitti_result['location'][instance_idx, 1], 4)
            z = round(kitti_result['location'][instance_idx, 2], 4)
            rotation_y = round(kitti_result['rotation_y'][instance_idx], 4)
            score = round(kitti_result['score'][instance_idx], 4)

            # y: downwards; move box origin from bottom center (kitti) to
            # true center (waymo)
            y -= height / 2
            # frame transformation: kitti -> waymo
            x, y, z = self.transform(T_k2w, x, y, z)

            # different conventions
            heading = -(rotation_y + np.pi / 2)
            while heading < -np.pi:
                heading += 2 * np.pi
            while heading > np.pi:
                heading -= 2 * np.pi

            box = label_pb2.Label.Box()
            box.center_x = x
            box.center_y = y
            box.center_z = z
            box.length = length
            box.width = width
            box.height = height
            box.heading = heading

            o = metrics_pb2.Object()
            o.object.box.CopyFrom(box)
            o.object.type = self.k2w_cls_map[cls]
            o.score = score

            o.context_name = context_name
            o.frame_timestamp_micros = frame_timestamp_micros

            return o
예제 #15
0
def convert(obj, context_name, frame_timestamp_micros):

    o = metrics_pb2.Object()
    o.object.box.CopyFrom(obj.box)
    o.object.type = obj.type
    o.score = 1.0
    o.object.num_lidar_points_in_box = obj.num_lidar_points_in_box  # needed for gt generation

    # for identification of the frame
    o.context_name = context_name
    o.frame_timestamp_micros = frame_timestamp_micros

    return o
예제 #16
0
def _create_pd_file_example():
    """Creates a prediction objects file."""
    objects = metrics_pb2.Objects()

    o = metrics_pb2.Object()
    # The following 3 fields are used to uniquely identify a frame a prediction
    # is predicted at. Make sure you set them to values exactly the same as what
    # we provided in the raw data. Otherwise your prediction is considered as a
    # false positive.
    o.context_name = (
        'context_name for the prediction. See Frame::context::name '
        'in  dataset.proto.')
    # The frame timestamp for the prediction. See Frame::timestamp_micros in
    # dataset.proto.
    invalid_ts = -1
    o.frame_timestamp_micros = invalid_ts
    # This is only needed for 2D detection or tracking tasks.
    # Set it to the camera name the prediction is for.
    o.camera_name = dataset_pb2.CameraName.FRONT

    # Populating box and score.
    box = label_pb2.Label.Box()
    box.center_x = 0
    box.center_y = 0
    box.center_z = 0
    box.length = 0
    box.width = 0
    box.height = 0
    box.heading = 0
    o.object.box.CopyFrom(box)
    # This must be within [0.0, 1.0]. It is better to filter those boxes with
    # small scores to speed up metrics computation.
    o.score = 0.5
    # For tracking, this must be set and it must be unique for each tracked
    # sequence.
    o.object.id = 'unique object tracking ID'
    # Use correct type.
    o.object.type = label_pb2.Label.TYPE_PEDESTRIAN

    objects.objects.append(o)

    # Add more objects. Note that a reasonable detector should limit its maximum
    # number of boxes predicted per frame. A reasonable value is around 400. A
    # huge number of boxes can slow down metrics computation.

    # Write objects to a file.
    f = open('/tmp/your_preds.bin', 'wb')
    f.write(objects.SerializeToString())
    f.close()
예제 #17
0
def create_gt_obj(frame, label, view):

    o = metrics_pb2.Object()
    o.context_name = frame.context.name
    o.frame_timestamp_micros = frame.timestamp_micros
    o.camera_name = view

    box = label_pb2.Label.Box()
    box.center_x, box.center_y = label.box.center_x, label.box.center_y
    box.length, box.width = label.box.length, label.box.width
    o.object.box.CopyFrom(label.box)

    o.object.type = label.type

    return o
        def parse_one_object(line):
            attrs = line.split()

            cls = attrs[0]
            height = float(attrs[8])
            width = float(attrs[9])
            length = float(attrs[10])
            x = float(attrs[11])
            y = float(attrs[12])
            z = float(attrs[13])
            rotation_y = float(attrs[14])
            score = float(attrs[15])

            # y: downwards; move box origin from bottom center (kitti) to true center (waymo)
            y = float(attrs[12]) - height / 2
            x, y, z = self.transform(T_k2w, x, y,
                                     z)  # frame transformation: kitti -> waymo

            # different conventions
            heading = -(rotation_y + np.pi / 2)
            while heading < -np.pi:
                heading += 2 * np.pi
            while heading > np.pi:
                heading -= 2 * np.pi

            # populate box
            box = label_pb2.Label.Box()
            box.center_x = x
            box.center_y = y
            box.center_z = z
            box.length = length
            box.width = width
            box.height = height
            box.heading = heading

            o = metrics_pb2.Object()
            o.object.box.CopyFrom(box)
            o.object.type = self.k2w_cls_map[cls]
            o.score = score

            # for identification of the frame
            o.context_name = context_name
            o.frame_timestamp_micros = frame_timestamp_micros

            return o
예제 #19
0
def _create_pd_file_example(path, json_data, objects):
    """Creates a prediction objects file."""
    kitti_file = open(path)
    for line in kitti_file.readlines():
        line = line.strip('\n').split()
        if line[0] == 'unknown' or line[0] == 'Sign':
            continue
        if line[15] == '0':
            continue
        o = metrics_pb2.Object()
        o.context_name = json_data["context_name"]
        o.frame_timestamp_micros = json_data["frame_timestamp_micros"]
        # if int(line[15]) > 5:
        #     o.difficulty = 1
        # else:
        #     o.difficulty = 2
        box = label_pb2.Label.Box()
        box.center_x = float(line[11])
        box.center_y = float(line[12])
        box.center_z = float(line[13])
        box.length = float(line[10])
        box.width = float(line[9])
        box.height = float(line[8])
        # box.length = float(line[8])
        # box.width = float(line[10])
        # box.height = float(line[9])
        box.heading = float(line[14])
        o.object.box.CopyFrom(box)
        # This must be within [0.0, 1.0]. It is better to filter those boxes with
        # small scores to speed up metrics computation.
        o.score = float(line[15])
        # if float(line[15])<1:
        #     o.score = float(line[15])
        # else:
        #     o.score = 0.5
        # if float(line[15])>=1:
        #     o.object.num_lidar_points_in_box = int(line[15])
        # For tracking, this must be set and it must be unique for each tracked
        # sequence.
        # o.object.id =
        # Use correct type.
        o.object.type = __type_list[line[0]]
        objects.objects.append(o)
    return objects
def _create_pd_file_example():
    """Creates a prediction objects file."""
    objects = metrics_pb2.Objects()

    o = metrics_pb2.Object()
    # 이후에 나오는 3개의 field는 예측을 수행하는 프레임 식별에 사용
    # raw data에서 제공한 값들과 field의 값들을 동일하게 설정할 것 - 그렇지 않을 경우 잘못된 것으로 간주될 수 있음
    o.context_name = (
        'context_name for the prediction. See Frame::context::name '
        'in  dataset.proto.')
    # 예측에 대한 timestamp
    invalid_ts = -1
    o.frame_timestamp_micros = invalid_ts
    # 2D 대상의 검출 또는 추적 작업에만 필요한 것
    # 예측 대상을 카메라 이름으로 설정
    o.camera_name = dataset_pb2.CameraName.FRONT

    # box와 score(값)을 매칭
    box = label_pb2.Label.Box()
    box.center_x = 0
    box.center_y = 0
    box.center_z = 0
    box.length = 0
    box.width = 0
    box.height = 0
    box.heading = 0
    o.object.box.CopyFrom(box)
    # 0.0과 1.0 사이의 값이 되어야 함 - 그렇지 않을 경우 필터링을 거쳐 작은 점수로 변환(matrics 연산 속도 높이기 위해 필요)
    o.score = 0.5
    # 추적을 위해 각각의 추적될 sequence에 대해 설정되고, 또한 고유의 값을 가져야만 함
    o.object.id = 'unique object tracking ID'
    # type 올바르게 설정하기
    o.object.type = label_pb2.Label.TYPE_PEDESTRIAN

    objects.objects.append(o)

    # 합리적인 검출 위해서는 프레임 당 box의 수를 제한해야 함(합리적인 값 = 약 400) - box의 수가 많아질 경우 metrics 연산 속도 느려질 수 있음

    # 파일에 객체 작성
    f = open('/tmp/your_preds.bin', 'wb')
    f.write(objects.SerializeToString())
    f.close()
예제 #21
0
    def results2proto(self, results, outfile_prefix):
        if isinstance(results[0], list):
            dict_results = self._det2dicts(results)

        detections = metrics_pb2.Objects()

        for detection in dict_results:
            obj = metrics_pb2.Object()

            # fig = plt.figure()
            # img = mpimg.imread('data/waymococo_f0/val2020/'+detection['filename'])
            # plt.imshow(img)
            #
            # rect = patches.Rectangle((detection['center_x']-detection['length']/2, detection['center_y'] -detection['width']/2 )
            #                          , detection['length'], detection['width'], linewidth=1, edgecolor='r', facecolor='none')
            #
            # ax = plt.gca()
            # # Add the patch to the Axes
            # ax.add_patch(rect)
            # plt.show()

            lab = label_pb2.Label()
            lab.box.center_x = detection['center_x']
            lab.box.center_y = detection['center_y']
            lab.box.length = detection['length']
            lab.box.width = detection['width']
            lab.type = detection['type']

            obj.object.MergeFrom(lab)
            if detection['score']:
                obj.score = detection['score']

            obj.context_name = detection["context_name"]
            obj.frame_timestamp_micros = detection["timestamp_micros"]
            obj.camera_name = detection["camera_name"]

            detections.objects.append(obj)

        f = open(outfile_prefix + ".bin", 'wb')
        serialized = detections.SerializeToString()
        f.write(serialized)
        f.close()
예제 #22
0
def _create_bbox_prediction(det, class_id, frame_name, marco_ts):
    o = metrics_pb2.Object()
    o.context_name = (frame_name)
    o.frame_timestamp_micros = marco_ts
    box = label_pb2.Label.Box()
    box.center_x = np.mean(det[[0, 2, 4, 6]])
    box.center_y = np.mean(det[[1, 3, 5, 7]])
    z0 = det[9]
    height = np.exp(det[10])
    box.center_z = z0 + height / 2
    box.width = np.sqrt((det[2] - det[4])**2 + (det[3] - det[5])**2)
    box.length = np.sqrt((det[2] - det[0])**2 + (det[3] - det[1])**2)
    box.height = height
    box.heading = det[8]
    o.object.box.CopyFrom(box)
    if len(det) == 12:
        o.score = det[11]
    o.object.id = ''
    o.object.type = class_id
    return o
예제 #23
0
def get_objects(tf_root):
    objects = metrics_pb2.Objects()
    frame = open_dataset.Frame()
    files = glob.glob(os.path.join(tf_root, "*.tfrecord"))
    for file_name in tqdm(files[:]):
        dataset = tf.data.TFRecordDataset(file_name, compression_type='')
        for idx, data in enumerate(dataset):
            frame.ParseFromString(bytearray(data.numpy()))
            context_name = frame.context.name
            timestamp_micros = frame.timestamp_micros
            for camera_image, camera_label in zip_longest(
                    frame.images, frame.camera_labels):
                camera_name = open_dataset.CameraName.Name.Name(
                    camera_image.name)
                if camera_label is not None:
                    for label in camera_label.labels:
                        o = metrics_pb2.Object()
                        o.context_name = context_name
                        o.frame_timestamp_micros = timestamp_micros
                        o.camera_name = getattr(dataset_pb2.CameraName,
                                                camera_name)

                        box = label_pb2.Label.Box()
                        box.center_x = label.box.center_x
                        box.center_y = label.box.center_y
                        box.length = label.box.length
                        box.width = label.box.width
                        o.object.box.CopyFrom(box)
                        # This must be within [0.0, 1.0]. It is better to filter those boxes with
                        # small scores to speed up metrics computation.
                        o.score = 1.0
                        # For tracking, this must be set and it must be unique for each tracked
                        # sequence.
                        o.object.id = ''
                        # Use correct type.
                        o.object.type = label.type
                        objects.objects.append(o)
    return objects
예제 #24
0
def create_prediction_obj(frame, pred_box, pred_score, pred_label, view,
                          orig_size):

    o = metrics_pb2.Object()
    o.context_name = frame.context.name
    o.frame_timestamp_micros = frame.timestamp_micros
    o.camera_name = view

    box = label_pb2.Label.Box()
    box.center_x = (pred_box[0] + pred_box[2]) / 2 * orig_size[1]
    box.center_y = (pred_box[1] + pred_box[3]) / 2 * orig_size[0]
    box.length = (pred_box[2] - pred_box[0]) * orig_size[1]
    box.width = (pred_box[3] - pred_box[1]) * orig_size[0]

    o.object.box.CopyFrom(box)

    # This must be within [0.0, 1.0]. It is better to filter those boxes with
    # small scores to speed up metrics computation.
    o.score = pred_score
    # Use correct type.
    o.object.type = pred_label + 1 if pred_label == 3 else pred_label

    return o
예제 #25
0
def create_pd_object(detection, context_name, frame_timestamp_micros,
                     camera_name):
    """Creates a prediction objects file."""
    o = metrics_pb2.Object()
    # The following 3 fields are used to uniquely identify a frame a prediction
    # is predicted at. Make sure you set them to values exactly the same as what
    # we provided in the raw data. Otherwise your prediction is considered as a
    # false negative.
    o.context_name = context_name
    # The frame timestamp for the prediction. See Frame::timestamp_micros in
    # dataset.proto.
    # invalid_ts = -1
    o.frame_timestamp_micros = frame_timestamp_micros
    # This is only needed for 2D detection or tracking tasks.
    # Set it to the camera name the prediction is for.
    o.camera_name = dataset_pb2.CameraName.Name.Value(camera_name)

    bbox, score, label = detection['bbox'], detection['score'], detection[
        'category_id']

    # Populating box and score.
    box = label_pb2.Label.Box()
    box.center_x = bbox[0] + bbox[2] * 0.5
    box.center_y = bbox[1] + bbox[3] * 0.5
    box.length = bbox[2]
    box.width = bbox[3]
    o.object.box.CopyFrom(box)
    # This must be within [0.0, 1.0]. It is better to filter those boxes with
    # small scores to speed up metrics computation.
    o.score = score
    # For tracking, this must be set and it must be unique for each tracked sequence.
    if 'object_id' in detection:
        o.object.id = detection['object_id']
    # Use correct type.
    o.object.type = label
    assert o.object.type != label_pb2.Label.TYPE_UNKNOWN
    return o
def _create_pd(args):
    objects = metrics_pb2.Objects()
    coco = COCO(args.anno_file)
    with open(args.results_file) as data_file:
        data = data_file.read()
        data_content = json.loads(data)
    results_per_id = []
    current_predictions = []
    imgIds = coco.getImgIds()
    current = imgIds[0]
    for pred in data_content:
        if pred['image_id'] == current:
            added = False
            bbox = pred['bbox']
            center_x = bbox[0] + (bbox[2] / 2)
            center_y = bbox[1] + (bbox[3] / 2)
            length = bbox[2]
            width = bbox[3]
            sub = [
                center_x, center_y, length, width, pred['score'],
                pred['category_id']
            ]
            current_predictions.append(sub)
        else:
            results_per_id.append(current_predictions)
            current_predictions = []
            current += 1
            added = True
    if not added:
        results_per_id.append(current_predictions)
    print(len(results_per_id))
    print(len(imgIds))
    print(imgIds[0])
    assert len(results_per_id) == len(imgIds)
    for idx in range(0, len(imgIds)):
        if (idx % 10000 == 0):
            print(idx)
        imgId = imgIds[idx]
        predictions = results_per_id[imgId - 1]
        o = metrics_pb2.Object()
        imgInfo = coco.loadImgs(imgId)
        o.context_name = imgInfo[0]['context']
        o.frame_timestamp_micros = imgInfo[0]['frame_timestamp_micros']
        o.camera_name = camera_name[imgInfo[0]['camera_name'] - 1]
        for pred in predictions:
            if pred[4] < 0.05:
                continue
            box = label_pb2.Label.Box()
            box.center_x = pred[0]
            box.center_y = pred[1]
            box.center_z = 0
            box.length = pred[2]
            box.width = pred[3]
            box.height = 0
            o.object.box.CopyFrom(box)
            o.score = pred[4]
            o.object.id = 'id'
            o.object.type = label_name[pred[5] - 1]
            objects.objects.append(o)
    f = open('valid.bin', 'wb')
    f.write(objects.SerializeToString())
    f.close()
예제 #27
0
def generatevalidationsubmission(PATH, outputfilepath, MODEL_DIR):
    now = datetime.datetime.now()
    print("In generatevalidationsubmission, current date and time : ")
    print(now.strftime("%Y-%m-%d %H:%M:%S"))

    tf.enable_eager_execution()
    print(tf.__version__)
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name())
    device = torch.device("cuda")

    print("Loading Waymo validation frames...")
    waymovalidationframes = loadWaymoValidationFrames(PATH)
    #mywaymovaldataset = myNewWaymoDataset(PATH, waymovalidationframes, get_transform(train=False))
    print("Total validation frames: ", len(waymovalidationframes))

    num_classes = 4  #Unknown:0, Vehicles: 1, Pedestrians: 2, Cyclists: 3, Signs (removed)
    # get the model using our helper function
    print("Loading previous model: " + MODEL_DIR)
    #model = get_previous_object_detection_model(num_classes, previous_model_path)
    model = load_previous_object_detection_model(num_classes, MODEL_DIR)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    # move model to the right device
    model.to(device)
    model.eval()

    objects = metrics_pb2.Objects()
    f = open(outputfilepath, 'wb')  #waymovalidationframes, waymotestframes
    outputallframes = waymovalidationframes
    print("Total frames: ", len(outputallframes))
    #step=5
    for i in range(len(outputallframes)):  #len(outputallframes)
        if i % 10 == 0:
            print("current frame: ", i)
        frame = outputallframes[i]
        image = tf.image.decode_jpeg(
            frame.images[0].image).numpy()  #front camera image
        img = Image.fromarray(image)
        boxes, pred_cls, scores = get_prediction(model, img, device,
                                                 score_threshold)
        total_boxes = len(boxes)
        if len(boxes) == 0:
            continue
        for i in range(total_boxes):  #patch in pred_bbox:
            label = pred_cls[i]
            bbox = boxes[i]
            score = scores[i]
            o = metrics_pb2.Object()
            o.context_name = frame.context.name
            o.frame_timestamp_micros = int(frame.timestamp_micros)
            o.camera_name = dataset_pb2.CameraName.FRONT
            o.score = score

            # Populating box and score.
            box = label_pb2.Label.Box()
            box.length = bbox[1][0] - bbox[0][0]
            box.width = bbox[1][1] - bbox[0][1]
            box.center_x = bbox[0][0] + box.length * 0.5
            box.center_y = bbox[0][1] + box.width * 0.5

            o.object.box.CopyFrom(box)
            o.object.detection_difficulty_level = label_pb2.Label.LEVEL_1
            o.object.num_lidar_points_in_box = 100
            o.object.type = INSTANCE_pb2[
                label]  # INSTANCE_CATEGORY_NAMES.index(label) #INSTANCE_pb2[label]
            print(
                f'Object type label: {label}, {INSTANCE_pb2[label]}, {INSTANCE_CATEGORY_NAMES.index(label)}'
            )
            assert o.object.type != label_pb2.Label.TYPE_UNKNOWN
            objects.objects.append(o)

    submission = submission_pb2.Submission()
    submission.task = submission_pb2.Submission.DETECTION_2D
    submission.account_name = '*****@*****.**'
    submission.authors.append('Kaikai Liu')
    submission.affiliation = 'None'
    submission.unique_method_name = 'torchvisionfaster'
    submission.description = 'none'
    submission.method_link = "empty method"
    submission.sensor_type = submission_pb2.Submission.CAMERA_ALL
    submission.number_past_frames_exclude_current = 0
    submission.number_future_frames_exclude_current = 0
    submission.inference_results.CopyFrom(objects)
    f = open(outputfilepath, 'wb')
    #f = open("./drive/My Drive/waymo_submission/waymo35.bin", 'wb')
    f.write(submission.SerializeToString())
    f.close()

    now = datetime.datetime.now()
    print("Finished validation, current date and time : ")
    print(now.strftime("%Y-%m-%d %H:%M:%S"))
예제 #28
0
def create_pd(frame, objmodel, device, score_threshold):
    """Creates a prediction objects file."""
    objects = metrics_pb2.Objects()

    image = tf.image.decode_jpeg(
        frame.images[0].image).numpy()  #front camera image
    img = Image.fromarray(image)

    #print(frame.camera_labels)#no labels
    #print(frame.context.name)#Refer to dataset.proto for the data format. The context contains shared information among all frames in the scene.
    #print(frame.timestamp_micros)

    #run the prediction
    boxes, pred_cls, scores = get_prediction(objmodel, img, device,
                                             score_threshold)
    #     print(pred_cls)
    #     print(boxes)
    boxnum = min(len(boxes), 400)
    for i in range(boxnum):  #patch in pred_bbox:
        patch = boxes[i]
        label = pred_cls[i]
        #print(patch)#[(827.3006, 617.69965), (917.02795, 656.8029)]

        o = metrics_pb2.Object(
        )  #One frame: https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/protos/metrics.proto
        # The following 3 fields are used to uniquely identify a frame a prediction
        # is predicted at. Make sure you set them to values exactly the same as what
        # we provided in the raw data. Otherwise your prediction is considered as a
        # false negative.
        o.context_name = frame.context.name  #('context_name for the prediction. See Frame::context::name ''in  dataset.proto.')
        # The frame timestamp for the prediction. See Frame::timestamp_micros in
        # dataset.proto.
        invalid_ts = frame.timestamp_micros  #-1
        o.frame_timestamp_micros = int(invalid_ts)
        # This is only needed for 2D detection or tracking tasks.
        # Set it to the camera name the prediction is for.
        o.camera_name = dataset_pb2.CameraName.FRONT

        # Populating box and score.
        box = label_pb2.Label.Box(
        )  #Bounding box: https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/label.proto
        width = patch[1][0] - patch[0][0]
        height = patch[1][1] - patch[0][1]
        box.center_x = patch[0][0] + width / 2
        box.center_y = patch[0][1] + height / 2
        box.center_z = 0
        box.length = 0
        box.width = width
        box.height = height
        box.heading = 0

        o.object.box.CopyFrom(box)  #o.object: Label type
        # This must be within [0.0, 1.0]. It is better to filter those boxes with
        # small scores to speed up metrics computation.
        o.score = 0.5
        # For tracking, this must be set and it must be unique for each tracked
        # sequence.
        o.object.id = 'xxx'  #'unique object tracking ID'
        # Use correct type.
        o.object.type = INSTANCE_pb2[label]  #label_pb2.Label.TYPE_PEDESTRIAN
        #print(o)
        objects.objects.append(o)

    return objects
예제 #29
0
def make_allcameraobject_list_from_subdir(np_dir, frame_context_name,
                                          frame_timestamp_micros):
    #for all camera, we changed the individual boxes.npy, classes.npy, scores.npy to one allcameraresult.npy
    #allcameraresult.npy uses camera name as the key, value is the result dict
    # boxes = np.load(os.path.join(np_dir, 'boxes.npy'))
    # classes = np.load(os.path.join(np_dir, 'classes.npy'))
    # scores = np.load(os.path.join(np_dir, 'scores.npy'))
    allcameraresult = np.load(os.path.join(np_dir, 'allcameraresult.npy'),
                              allow_pickle=True)
    allcameraresult = allcameraresult.item()
    #print(type(allcameraresult))

    obj_list = []
    for imagename in allcameras:  #go through all cameras

        resultdict = allcameraresult[imagename]  #one camera
        boxes = resultdict['boxes']
        classes = resultdict['classes']
        scores = resultdict['scores']

        # Read the input fields file if it exists.
        # input_fields = []
        # input_field_path = os.path.join(np_dir, 'input_fields.txt')
        # if os.path.isfile(input_field_path):
        #     with open(input_field_path, 'r') as input_field_file:
        #         input_fields = input_field_file.readlines()
        input_fields = [imagename]  #["FRONT_IMAGE"]# input_fields.txt is empty

        num_objs = boxes.shape[0]
        assert classes.shape[0] == num_objs
        assert scores.shape[0] == num_objs

        for i in range(num_objs):
            obj = metrics_pb2.Object()
            obj.context_name = frame_context_name
            obj.frame_timestamp_micros = frame_timestamp_micros
            obj.score = scores[i]
            obj.object.type = classes[i]

            # Handle the box creation differently for 3D boxes (where the inner
            # dimension is 7) and 2D boxes (where the inner dimension is 4).
            if boxes.shape[1] == 7:
                obj.object.box.center_x = boxes[i, 0]
                obj.object.box.center_y = boxes[i, 1]
                obj.object.box.center_z = boxes[i, 2]
                obj.object.box.length = boxes[i, 3]
                obj.object.box.width = boxes[i, 4]
                obj.object.box.height = boxes[i, 5]
                obj.object.box.heading = boxes[i, 6]
            elif boxes.shape[1] == 4:
                obj.object.box.center_x = boxes[i, 0]
                obj.object.box.center_y = boxes[i, 1]
                obj.object.box.length = boxes[i, 2]
                obj.object.box.width = boxes[i, 3]

                # For 2D detection objects, the camera name of the object proto comes from
                # the camera whose image was used as input. Thus, the input_fields
                # specified by the user are checked to ensure that they only used a single
                # input and that the input was the RGB image from one of the cameras.
                #print("input fields:", input_fields)
                if len(input_fields) != 1:
                    raise ValueError(
                        'Can only use one input when submitting 2D detection '
                        'results; instead was using:\n' +
                        '\n'.join(input_fields))

                input_field = input_fields[0]
                if not input_field.endswith('_IMAGE'):
                    raise ValueError(
                        'For 2D detection results, the input field should be '
                        'one of the camera images, but got ' + input_field)
                obj.camera_name = dataset_pb2.CameraName.Name.Value(
                    input_field[:-6])  #remove _IMAGE
                print(
                    f'obj camera name: {obj.camera_name}, input_field[:-6]: {input_field[:-6]}'
                )  #obj camera name: 1, input_field[:-6]: FRONT

            # Run some checks to avoid adding invalid objects. These are the same checks
            # used in metrics/tools/create_submission.cc
            if (obj.score < 0.03 or obj.object.box.length < 0.01
                    or obj.object.box.width < 0.01
                    or (obj.object.box.HasField('height')
                        and obj.object.box.height < 0.01)):
                print('Skipping invalid object', obj)
                continue

            obj_list.append(obj)

    return obj_list
예제 #30
0
def make_object_list_from_subdir(np_dir, frame_context_name,
                                 frame_timestamp_micros):
    """Make a list of Object protos from the detection results in a directory.
    In particular, this function assumes that np_dir is a subdirectory like one
    created by the latency evaluator for a particular frame, and thus that it
    contains three npy files:
    * boxes.npy: a N x 7 float array with the x, y, z, length, width, height, and
                 heading for all the detections in this frame.
    * classes.npy: a N-dim uint8 array with the type IDs in {0, 1, 2, 3, 4} for
                   all the detections in this frame.
    * scores.npy: a N-dim float array with the scores in [0, 1] for all the
                  detections in this frame.
    These arrays are converted into a list of N Object protos, one for each
    detection, where all the protos have the frame_context_name and
    frame_timestamp_micros set by the arguments.
    Args:
      np_dir: string directory name containing the npy files.
      frame_context_name: string context_name to set for each Object proto.
      frame_timestamp_micros: int timestamp micros to set for each Object proto.
    Returns:
      List of N Object protos, one for each detection present in the npy files.
      They all have the same context name and frame_timestamp_micros, while their
      boxes, scores, and types come from the numpy arrays.
    """
    boxes = np.load(os.path.join(np_dir, 'boxes.npy'))
    classes = np.load(os.path.join(np_dir, 'classes.npy'))
    scores = np.load(os.path.join(np_dir, 'scores.npy'))

    # Read the input fields file if it exists.
    input_fields = []
    input_field_path = os.path.join(np_dir, 'input_fields.txt')
    if os.path.isfile(input_field_path):
        with open(input_field_path, 'r') as input_field_file:
            input_fields = input_field_file.readlines()

    num_objs = boxes.shape[0]
    assert classes.shape[0] == num_objs
    assert scores.shape[0] == num_objs

    obj_list = []
    for i in range(num_objs):
        obj = metrics_pb2.Object()
        obj.context_name = frame_context_name
        obj.frame_timestamp_micros = frame_timestamp_micros
        obj.score = scores[i]
        obj.object.type = classes[i]

        # Handle the box creation differently for 3D boxes (where the inner
        # dimension is 7) and 2D boxes (where the inner dimension is 4).
        if boxes.shape[1] == 7:
            obj.object.box.center_x = boxes[i, 0]
            obj.object.box.center_y = boxes[i, 1]
            obj.object.box.center_z = boxes[i, 2]
            obj.object.box.length = boxes[i, 3]
            obj.object.box.width = boxes[i, 4]
            obj.object.box.height = boxes[i, 5]
            obj.object.box.heading = boxes[i, 6]
        elif boxes.shape[1] == 4:
            obj.object.box.center_x = boxes[i, 0]
            obj.object.box.center_y = boxes[i, 1]
            obj.object.box.length = boxes[i, 2]
            obj.object.box.width = boxes[i, 3]

            # For 2D detection objects, the camera name of the object proto comes from
            # the camera whose image was used as input. Thus, the input_fields
            # specified by the user are checked to ensure that they only used a single
            # input and that the input was the RGB image from one of the cameras.
            if len(input_fields) != 1:
                raise ValueError(
                    'Can only use one input when submitting 2D detection '
                    'results; instead was using:\n' + '\n'.join(input_fields))
            input_field = input_fields[0]
            if not input_field.endswith('_IMAGE'):
                raise ValueError(
                    'For 2D detection results, the input field should be '
                    'one of the camera images, but got ' + input_field)
            obj.camera_name = dataset_pb2.CameraName.Name.Value(
                input_field[:-6])

        # Run some checks to avoid adding invalid objects. These are the same checks
        # used in metrics/tools/create_submission.cc
        if (obj.score < 0.03 or obj.object.box.length < 0.01
                or obj.object.box.width < 0.01
                or (obj.object.box.HasField('height')
                    and obj.object.box.height < 0.01)):
            print('Skipping invalid object', obj)
            continue

        obj_list.append(obj)

    return obj_list