Пример #1
0
    def process(self, raw_data, input_data, ground_truth, piped_params=None):
        # Add input_data
        img_t0 = cv2.imdecode(np.frombuffer(raw_data["img"], np.uint8), cv2.IMREAD_COLOR)
        img_t0, _ = resize_img(img_t0, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM)

        # Add ground_truth mask
        mask_t1 = cv2.imdecode(np.frombuffer(raw_data["depth"], np.uint8), cv2.IMREAD_ANYDEPTH)
        mask_t1, _ = resize_img(mask_t1, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST)

        # agument
        if "replay_img_aug" in piped_params and len(piped_params["replay_img_aug"]) > 0:
            for replay in piped_params["replay_img_aug"]:
                transformed = A.ReplayCompose.replay(replay, image=img_t0, mask=mask_t1)
                img_t0 = transformed["image"]
                mask_t1 = transformed["mask"]
        elif self.do_augmentation:
            img_t0, mask_t1 = self.augment(img_t0, mask_t1)
        img_t0 = img_t0.astype(np.float32)

        mask_t1, _ = resize_img(mask_t1, self.params.MASK_WIDTH, self.params.MASK_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST)

        # adjust mask values
        mask_t1 = mask_t1.astype(np.float32)
        mask_t1 /= 255.0
        pos_mask = np.where(mask_t1 > 0.0, 1.0, 0.0) 
        mask_t1 = np.clip(mask_t1, 4.1, 130.0)
        mask_t1 = 22 * np.sqrt(mask_t1 - 4)
        mask_t1 *= pos_mask

        input_data = img_t0
        ground_truth = mask_t1

        return raw_data, input_data, ground_truth, piped_params
Пример #2
0
    def process(self, raw_data, input_data, ground_truth, piped_params=None):
        # Add input_data
        img_t0 = cv2.imdecode(np.frombuffer(raw_data[0]["img"], np.uint8),
                              cv2.IMREAD_COLOR)
        img_t0, _ = resize_img(img_t0,
                               self.params.INPUT_WIDTH,
                               self.params.INPUT_HEIGHT,
                               offset_bottom=self.params.OFFSET_BOTTOM)

        img_t1 = cv2.imdecode(np.frombuffer(raw_data[1]["img"], np.uint8),
                              cv2.IMREAD_COLOR)
        img_t1, _ = resize_img(img_t1,
                               self.params.INPUT_WIDTH,
                               self.params.INPUT_HEIGHT,
                               offset_bottom=self.params.OFFSET_BOTTOM)

        # img_t0, img_t1 = self.augment(img_t0, img_t1)
        img_t0 = img_t0.astype(np.float32)
        img_t1 = img_t1.astype(np.float32)

        # Add ground_truth mask
        mask_t0 = cv2.imdecode(np.frombuffer(raw_data[0]["depth"], np.uint8),
                               cv2.IMREAD_ANYDEPTH)
        mask_t0, _ = resize_img(mask_t0,
                                self.params.INPUT_WIDTH,
                                self.params.INPUT_HEIGHT,
                                offset_bottom=self.params.OFFSET_BOTTOM,
                                interpolation=cv2.INTER_NEAREST)
        mask_t0 = mask_t0.astype(np.float32)
        mask_t0 /= 255.0
        mask_t0 = np.expand_dims(mask_t0, axis=-1)

        mask_t1 = cv2.imdecode(np.frombuffer(raw_data[1]["depth"], np.uint8),
                               cv2.IMREAD_ANYDEPTH)
        mask_t1, _ = resize_img(mask_t1,
                                self.params.INPUT_WIDTH,
                                self.params.INPUT_HEIGHT,
                                offset_bottom=self.params.OFFSET_BOTTOM,
                                interpolation=cv2.INTER_NEAREST)
        mask_t1 = mask_t1.astype(np.float32)
        mask_t1 /= 255.0
        mask_t1 = np.expand_dims(mask_t1, axis=-1)

        # currently hardcoded for driving stereo dataset
        intr_scale = (self.params.INPUT_WIDTH / 1758.0)
        intr = np.array(
            [[2063.0 * intr_scale, 0.0, 978.0 * intr_scale],
             [0.0, 2063.0 * intr_scale, 584.0 * intr_scale], [0.0, 0.0, 1.0]],
            dtype=np.float32)

        input_data = [img_t0, img_t1, intr]
        ground_truth = [mask_t0, mask_t1]

        return raw_data, input_data, ground_truth, piped_params
Пример #3
0
 def test_resize_img(self, org_width, org_height, goal_width, goal_height,
                     offset_bottom):
     img = np.zeros((org_height, org_width))
     resized_img, roi = resize_img(img, goal_width, goal_height,
                                   offset_bottom)
     assert resized_img.shape[0] == goal_height
     assert resized_img.shape[1] == goal_width
Пример #4
0
def create_dataset(input_shape):
    print("Resize Input to: " + str(input_shape))
    dataset = []
    
    # Create sample dataset for post training quantization
    client = MongoClient("mongodb://localhost:27017")
    collection = client["labels"]["nuscenes_train"]
    documents = collection.find({}).limit(600).skip(50)

    documents_list = list(documents)
    assert(len(documents_list) > 0)
    for i in range(len(documents_list)):
        decoded_img_t0 = np.frombuffer(documents_list[i]["img"], np.uint8)
        img_t0 = cv2.imdecode(decoded_img_t0, cv2.IMREAD_COLOR)
        img_t0, _ = resize_img(img_t0, input_shape[2], input_shape[1], offset_bottom=0)
        dataset.append(np.array([img_t0], dtype=np.float32))

    return dataset
Пример #5
0
    # cap = cv2.VideoCapture('/path/to/video.mp4')
    # cap.set(cv2.CAP_PROP_POS_MSEC, 0)
    # while (cap.isOpened()):
    #     ret, img = cap.read()

    documents = collection.find({})
    for doc in documents:
        decoded_img = np.frombuffer(doc["img"], np.uint8)
        img = cv2.imdecode(decoded_img, cv2.IMREAD_COLOR)

        gt_mask = None
        if doc["mask"] is not None:
            decoded_mask = np.frombuffer(doc["mask"], np.uint8)
            gt_mask = cv2.imdecode(decoded_mask, cv2.IMREAD_COLOR)

        img, roi = resize_img(img, args.img_width, args.img_height,
                              args.offset_bottom)

        if is_tf_lite:
            img_input = img
            interpreter.set_tensor(input_details[0]['index'], [img_input])
            #input_shape = input_details[0]['shape']
            start_time = time.time()
            interpreter.invoke()
            elapsed_time = time.time() - start_time
            # The function `get_tensor()` returns a copy of the tensor data. Use `tensor()` in order to get a pointer to the tensor.
            output_data = interpreter.get_tensor(output_details[0]['index'])
            semseg_img = to_3channel(output_data[0].astype(np.float64),
                                     List(SEMSEG_CLASS_MAPPING.items()))
        else:
            img_arr = np.array([img])
            start_time = time.time()
Пример #6
0
def main(args):
    args.path = "/home/jo/training_data/nuscenes/nuscenes-v1.0"
    args.resize = [640, 256, 0]

    client = MongoClient(args.conn)
    collection = client[args.db][args.collection]

    nusc = NuScenes(version="v1.0-mini", dataroot=args.path, verbose=True)
    nusc.list_scenes()

    for scene in tqdm(nusc.scene):
        next_sample_token = scene["first_sample_token"]

        while True:
            sample = nusc.get('sample', next_sample_token)
            next_sample_token = sample["next"]
            
            sample_data_token = sample["data"]["CAM_FRONT"]
            sample_data = nusc.get_sample_data(sample_data_token)
            cam_front_data = nusc.get('sample_data', sample_data_token)

            # Create image data
            img_path = sample_data[0]
            if os.path.exists(img_path):
                img = cv2.imread(img_path)
                nusc.render_pointcloud_in_image
                depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='LIDAR_TOP', camera_channel='CAM_FRONT')
                # Not sure about radar data, seems to not allign with lidar data very often, leaving it out for now
                # depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='RADAR_FRONT', camera_channel='CAM_FRONT', depth_map=depth_map)
                roi = Roi()
                if args.resize:
                    img, roi = resize_img(img, args.resize[0], args.resize[1], args.resize[2])
                    depth_map, _ = resize_img(depth_map, args.resize[0], args.resize[1], args.resize[2], cv2.INTER_NEAREST)
                img_bytes = cv2.imencode('.jpeg', img)[1].tobytes()
                depth_bytes = cv2.imencode(".png", depth_map)[1].tobytes()
                content_type = "image/jpeg"
            else:
                print("WARNING: file not found: " + img_path + ", continue with next image")
                continue

            # Get sensor extrinsics, Not sure why roll and yaw seem to be PI/2 off compared to nuImage calibarted sensor
            sensor = nusc.get('calibrated_sensor', cam_front_data['calibrated_sensor_token'])
            q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1], sensor["rotation"][2], sensor["rotation"][3])
            roll  = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5
            pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) 
            yaw   = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5
            # print(sensor["translation"])
            # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}")

            # Sensor calibration is static, pose would be dynamic. TODO: Somehow also add some sort of cam to cam motion to be learned
            # ego_pose = nusc.get("ego_pose", cam_front_data["ego_pose_token"])
            # q = MyQuaternion(ego_pose["rotation"][0], ego_pose["rotation"][1], ego_pose["rotation"][2], ego_pose["rotation"][3])
            # roll  = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5
            # pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) 
            # yaw   = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5
            # print(ego_pose["translation"])
            # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}")

            entry = Entry(
                img=img_bytes,
                content_type=content_type,
                depth=depth_bytes,
                org_source="nuscenes",
                org_id=img_path,
                objects=[],
                ignore=[],
                has_3D_info=True,
                has_track_info=True,
                sensor_valid=True,
                yaw=wrap_angle(yaw),
                roll=wrap_angle(roll),
                pitch=wrap_angle(pitch),
                translation=sensor["translation"],
                scene_token=sample["scene_token"],
                timestamp=sample["timestamp"]
            )

            labels = sample_data[1]
            idx_counter = 0
            for box in labels:
                if box.name in CLASS_MAP.keys():
                    box.translate(np.array([0, box.wlh[2] / 2, 0])) # translate center center to bottom center
                    pos_3d = np.array([*box.center, 1.0])
                    cam_mat = np.hstack((sample_data[2], np.zeros((3, 1))))
                    cam_mat[0][2] += roi.offset_left
                    cam_mat[1][2] += roi.offset_top
                    cam_mat *= roi.scale
                    cam_mat[2][2] = 1.0
                    # create rotation matrix, somehow using the rotation matrix directly from nuscenes does not work
                    # instead, we calc the rotation as it is in kitti and use the same code
                    v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                    yaw = -np.arctan2(v[2], v[0])
                    rot_angle = wrap_angle(float(yaw) + math.pi * 0.5) # because parallel to optical view of camera = 90 deg
                    rot_mat = np.array([
                        [math.cos(rot_angle), 0.0, math.sin(rot_angle), 0.0],
                        [0.0, 1.0, 0.0, 0.0],
                        [-math.sin(rot_angle), 0.0, math.cos(rot_angle), 0.0],
                        [0.0, 0.0, 0.0, 1.0],
                    ])
                    pos_2d = np.matmul(cam_mat, pos_3d)
                    pos_2d /= pos_2d[2]
                    box3d = calc_cuboid_from_3d(pos_3d, cam_mat, rot_mat, box.wlh[0], box.wlh[2], box.wlh[1])
                    box2d = bbox_from_cuboid(box3d)

                    annotation = nusc.get("sample_annotation", box.token)
                    instance_token = annotation["instance_token"]

                    entry.objects.append(Object(
                        obj_class=CLASS_MAP[box.name],
                        box2d=box2d,
                        box3d=box3d,
                        box3d_valid=True,
                        instance_token=instance_token,
                        truncated=None,
                        occluded=None,
                        width=box.wlh[0],
                        length=box.wlh[1],
                        height=box.wlh[2],
                        orientation=rot_angle,
                        # converted to autosar coordinate system
                        x=pos_3d[2],
                        y=-pos_3d[0],
                        z=-pos_3d[1]
                    ))

                    # For debugging show the data in the image
                    box2d = list(map(int, box2d))
                    cv2.circle(img, (int(pos_2d[0]), int(pos_2d[1])), 3, (255, 0, 0))
                    cv2.rectangle(img, (box2d[0], box2d[1]), (box2d[0] + box2d[2], box2d[1] + box2d[3]), (255, 255, 0), 1)
                    cv2.putText(img,f"{idx_counter}", (box2d[0], box2d[1] + int(box2d[3])), 0, 0.4, (255, 255, 255))
                    idx_counter += 1
                    print(f"{idx_counter}: {math.degrees(rot_angle)}")

            f, (ax1, ax2) = plt.subplots(2, 1)
            ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            max_val = np.amax(depth_map)
            ax2.imshow(depth_map, cmap="gray", vmin=1, vmax=10000)
            plt.show()

            # collection.insert_one(entry.get_dict())

            if next_sample_token == "":
                break
Пример #7
0
        curr_scene_depth_map_root = os.path.join(args.depth_map, folder)
        curr_scene_image_root = os.path.join(args.images, folder)
        _, _, filenames = next(os.walk(curr_scene_depth_map_root))
        filenames.sort() # since we have video frames
        timestamp = 0
        next_timestamp = 1
        for filename in tqdm(filenames):
            depth_file = os.path.join(curr_scene_depth_map_root, filename)
            img_file = os.path.join(curr_scene_image_root, filename[:-3] + "jpg")
            if os.path.isfile(img_file):
                img_data = cv2.imread(img_file)
                depth_data = cv2.imread(depth_file, -1) # load as is (uint16 grayscale img)
                # depth_data_org = depth_data

                if resize_img is not None:
                    depth_data, _ = resize_img(depth_data, args.resize[0], args.resize[1], args.resize[2], interpolation=cv2.INTER_NEAREST)
                    img_data, _ = resize_img(img_data, args.resize[0], args.resize[1], args.resize[2])

                img_bytes = cv2.imencode(".jpg", img_data)[1].tobytes()
                depth_bytes = cv2.imencode(".png", depth_data)[1].tobytes()

                entry = Entry(
                    img=img_bytes,
                    depth=depth_bytes,
                    content_type="image/jpg",
                    org_source="driving_stereo",
                    org_id=filename[:-3],
                    scene_token=folder,
                    timestamp=timestamp,
                    next_timestamp=next_timestamp
                )
Пример #8
0
def test():
    params = DmdsParams()
    loss = DmdsLoss(params)
    batch_size = 2

    client = MongoClient("mongodb://localhost:27017")
    collection = client["depth"]["driving_stereo"]
    documents = collection.find({}).limit(10).skip(300)
    documents = list(documents)
    for i in range(0, len(documents) - 1):
        intr = np.array(
            [[375.0, 0.0, 160.0], [0.0, 375.0, 128.0], [0.0, 0.0, 1.0]],
            dtype=np.float32)
        intr = np.stack([intr] * batch_size)

        img0 = cv2.imdecode(np.frombuffer(documents[i]["img"], np.uint8),
                            cv2.IMREAD_COLOR)
        img0, _ = resize_img(img0, 320, 128, 0)
        img0 = np.stack([img0] * batch_size, axis=0)
        img0 = img0.astype(np.float32)

        img1 = cv2.imdecode(np.frombuffer(documents[i + 1]["img"], np.uint8),
                            cv2.IMREAD_COLOR)
        img1, _ = resize_img(img1, 320, 128, 0)
        img1 = np.stack([img1] * batch_size, axis=0)
        img1 = img1.astype(np.float32)

        # create gt depth_maps
        x0 = cv2.imdecode(np.frombuffer(documents[i]["depth"], np.uint8),
                          cv2.IMREAD_ANYDEPTH)
        x0, _ = resize_img(x0, 320, 128, 0, interpolation=cv2.INTER_NEAREST)
        x0 = np.expand_dims(x0, axis=-1)
        x0 = np.stack([x0] * batch_size, axis=0)
        x0 = x0.astype(np.float32)
        x0 /= 255.0

        x1 = cv2.imdecode(np.frombuffer(documents[i + 1]["depth"], np.uint8),
                          cv2.IMREAD_ANYDEPTH)
        x1, _ = resize_img(x1, 320, 128, 0, interpolation=cv2.INTER_NEAREST)
        x1 = np.expand_dims(x1, axis=-1)
        x1 = np.stack([x1] * batch_size, axis=0)
        x1 = x1.astype(np.float32)
        x1 /= 255.0

        mm = np.zeros((*img0.shape[:-1], 3), dtype=np.float32)
        mm[:, 23:85, 132:320, :] = [0.0, 0.0, 0.0]

        mm_inv = np.zeros((*img0.shape[:-1], 3), dtype=np.float32)
        mm_inv[:, 23:85, 172:320, :] = [0.0, 0.0, 0.0]

        rot = np.zeros((batch_size, 1, 1, 3), dtype=np.float32)
        rot[:, 0, 0, :] = np.array([-0.1, 0.0, 0.1])
        rot_inv = np.zeros((batch_size, 1, 1, 3), dtype=np.float32)
        rot_inv[:, 0, 0, :] = np.array([0.1, 0.0, -0.1])

        tran = np.zeros((batch_size, 1, 1, 3), dtype=np.float32)
        tran[:, 0,
             0, :] = np.array([0.0, 0.0, 10
                               ])  # [left,right | up,down | forward,backward]
        tran_inv = np.zeros((batch_size, 1, 1, 3), dtype=np.float32)
        tran_inv[:, 0, 0, :] = np.array([0.0, 0.0, -10])

        loss.calc(img1, img0, x1, x0, mm_inv, mm, tran_inv, tran, rot_inv, rot,
                  intr, x0, x1, 0)

        for key in loss.loss_vals.keys():
            print(f"{key}: {loss.loss_vals[key].numpy()}")
        print(" - - - - - - - -")

        for i in range(len(img0)):
            f, ((ax11, ax12), (ax21, ax22), (ax31, ax32)) = plt.subplots(3, 2)
            # img0, img1
            ax11.imshow(
                cv2.cvtColor((img0[i]).astype(np.uint8), cv2.COLOR_BGR2RGB))
            ax12.imshow(
                cv2.cvtColor((img1[i]).astype(np.uint8), cv2.COLOR_BGR2RGB))
            # x0, mm
            ax21.imshow(x1[i], cmap='gray', vmin=0, vmax=170)
            ax22.imshow((mm[i] * (255.0 / np.amax(mm[i]))).astype(np.uint8))
            # mask, frame closer mask
            ax31.imshow(loss.warp_mask[i], cmap='gray', vmin=0, vmax=1)
            ax32.imshow(
                cv2.cvtColor((loss.resampled_img1[i].numpy()).astype(np.uint8),
                             cv2.COLOR_BGR2RGB))
            plt.show()
Пример #9
0
        model: tf.keras.models.Model = tf.keras.models.load_model(
            args.model_path, compile=False)
        model.summary()
        print("Using Tensorflow")

    # alternative data source, mp4 video
    # cap = cv2.VideoCapture('/home/jo/training_data/speedchallenge/data/train.mp4')
    # while (cap.isOpened()):
    #     ret, img = cap.read()
    documents = collection.find({}).limit(20)
    for doc in documents:
        decoded_img = np.frombuffer(doc["img"], np.uint8)
        img = cv2.imdecode(decoded_img, cv2.IMREAD_COLOR)

        input_img, roi = resize_img(img,
                                    input_shape[2],
                                    input_shape[1],
                                    offset_bottom=args.offset_bottom)

        if is_tf_lite:
            input_img = input_img.astype(np.float32)
            interpreter.set_tensor(input_details[0]['index'], [input_img])
            start_time = time.time()
            interpreter.invoke()
            elapsed_time = time.time() - start_time
            # The function `get_tensor()` returns a copy of the tensor data. Use `tensor()` in order to get a pointer to the tensor.
            output_data = interpreter.get_tensor(output_details[0]['index'])
            output_mask = output_data[0]
        else:
            start_time = time.time()
            raw_result = model.predict(np.array([input_img]))
            elapsed_time = time.time() - start_time
Пример #10
0
                args.model_path, custom_objects, compile=False)
        model.summary()
        print("Using Tensorflow")

    # alternative data source, mp4 video
    # cap = cv2.VideoCapture('/path/to/video.mp4')
    # cap.set(cv2.CAP_PROP_POS_MSEC, 0)
    # while (cap.isOpened()):
    #     ret, img = cap.read()

    documents = collection.find({}).limit(1000)
    documents = list(documents)
    for i in range(0, len(documents) - 1):
        decoded_img_t0 = np.frombuffer(documents[i]["img"], np.uint8)
        img_t0 = cv2.imdecode(decoded_img_t0, cv2.IMREAD_COLOR)
        img_t0, _ = resize_img(img_t0, args.img_width, args.img_height,
                               args.offset_bottom)

        decoded_img_t1 = np.frombuffer(documents[i + 1]["img"], np.uint8)
        img_t1 = cv2.imdecode(decoded_img_t1, cv2.IMREAD_COLOR)
        img_t1, _ = resize_img(img_t1, args.img_width, args.img_height,
                               args.offset_bottom)

        intr = np.array(
            [[375.0, 0.0, 160.0], [0.0, 375.0, 128.0], [0.0, 0.0, 1.0]],
            dtype=np.float32)

        if is_tf_lite:
            interpreter.set_tensor(input_details[0]['index'], [img_t0])
            interpreter.set_tensor(input_details[1]['index'], [img_t1])
            #input_shape = input_details[0]['shape']
            start_time = time.time()
Пример #11
0
def main(args):
    args.path = "/home/jo/training_data/nuscenes/nuimages-v1.0"
    args.resize = [640, 256, 0]

    client = MongoClient(args.conn)
    collection = client[args.db][args.collection]

    nuim = NuImages(version="v1.0-val",
                    dataroot=args.path,
                    lazy=True,
                    verbose=False)

    for sample in tqdm(nuim.sample):
        sample_data = nuim.get("sample_data", sample["key_camera_token"])

        # check if already exists, if yes, continue with next
        # check_db_entry = collection.find_one({ "org_source": "nuscenes", "org_id": sample_data["filename"]})
        # if check_db_entry is not None:
        #     print("WARNING: Entry " + str(sample_data["filename"]) + " already exists, continue with next image")
        #     continue

        # Create image data
        img_path = args.path + "/" + sample_data["filename"]
        roi = Roi()
        if os.path.exists(img_path):
            if "CAM_FRONT/" in img_path:
                img = cv2.imread(img_path)
                if args.resize is not None:
                    img, roi = resize_img(img, args.resize[0], args.resize[1],
                                          args.resize[2])
                img_bytes = cv2.imencode('.jpeg', img)[1].tobytes()
                content_type = "image/jpeg"
            else:
                continue
        else:
            print("WARNING: file not found: " + img_path +
                  ", continue with next image")
            continue

        # Get sensor extrinsics
        sensor = nuim.get("calibrated_sensor",
                          sample_data["calibrated_sensor_token"])
        q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1],
                         sensor["rotation"][2], sensor["rotation"][3])
        roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x),
                          1.0 - 2.0 * (q.x * q.x + q.y * q.y))
        pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x))
        yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y),
                         -1.0 + 2.0 * (q.w * q.w + q.x * q.x))

        entry = Entry(img=img_bytes,
                      content_type=content_type,
                      org_source="nuimages",
                      org_id=sample_data["filename"],
                      objects=[],
                      ignore=[],
                      has_3D_info=False,
                      has_track_info=False,
                      sensor_valid=True,
                      yaw=wrap_angle(yaw),
                      roll=wrap_angle(roll),
                      pitch=wrap_angle(pitch),
                      translation=sensor["translation"])

        # Create objects
        obj_tokens, surface_tokens = nuim.list_anns(sample['token'],
                                                    verbose=False)
        for obj_token in obj_tokens:
            obj = nuim.get("object_ann", obj_token)
            obj_cat = nuim.get("category", obj["category_token"])
            nu_class_name = obj_cat["name"]
            if nu_class_name not in IGNORE_CLASSES and nu_class_name in CLASS_MAP:
                obj_class = CLASS_MAP[nu_class_name]
                if obj_class not in list(OD_CLASS_MAPPING.keys()):
                    print("WARNING: Unkown class " + obj_class)
                    continue
                # TODO: Let's use attributes somehow e.g. car.moving
                # for attrib_token in obj["attribute_tokens"]:
                #     obj_attrib = nuim.get("attribute", attrib_token)

                # x, y, width, height
                bbox = np.array([(obj["bbox"][0] + roi.offset_left),
                                 (obj["bbox"][1] + roi.offset_top),
                                 obj["bbox"][2] - obj["bbox"][0],
                                 obj["bbox"][3] - obj["bbox"][1]],
                                dtype=np.float32)
                bbox *= roi.scale
                bbox = bbox.astype(np.int32)

                cv2.rectangle(img, (bbox[0], bbox[1]),
                              (bbox[0] + bbox[2], bbox[1] + bbox[3]),
                              (0, 255, 0), 1)
                entry.objects.append(
                    Object(
                        obj_class=obj_class,
                        box2d=bbox.tolist(),
                        box3d=None,
                        box3d_valid=False,
                        truncated=None,
                        occluded=None,
                    ))

        # f, (ax1) = plt.subplots(1, 1)
        # ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        # plt.show()

        # upload to mongodb
        collection.insert_one(entry.get_dict())
Пример #12
0
    def process(self, raw_data, input_data, ground_truth, piped_params=None):
        # start_time = time.time()
        # Add input_data
        img_encoded = np.frombuffer(raw_data["img"], np.uint8)
        input_data = cv2.imdecode(img_encoded, cv2.IMREAD_COLOR)
        input_data, roi_img = resize_img(
            input_data,
            self.params.INPUT_WIDTH,
            self.params.INPUT_HEIGHT,
            offset_bottom=self.params.OFFSET_BOTTOM)
        piped_params["roi_img"] = roi_img

        # Add ground_truth mask
        mask_encoded = np.frombuffer(raw_data["mask"], np.uint8)
        mask_img = cv2.imdecode(mask_encoded, cv2.IMREAD_COLOR)
        mask_img, _ = resize_img(mask_img,
                                 self.params.INPUT_WIDTH,
                                 self.params.INPUT_HEIGHT,
                                 offset_bottom=self.params.OFFSET_BOTTOM,
                                 interpolation=cv2.INTER_NEAREST)

        # augment and resize mask to real size
        if "replay_img_aug" in piped_params and len(
                piped_params["replay_img_aug"]) > 0:
            for replay in piped_params["replay_img_aug"]:
                transformed = A.ReplayCompose.replay(replay,
                                                     image=input_data,
                                                     mask=mask_img)
                input_data = transformed["image"]
                mask_img = transformed["mask"]
        elif self.start_augment is not None and piped_params[
                "epoch"] >= self.start_augment[0]:
            do_affine_augmentation = piped_params[
                "epoch"] >= self.start_augment[1]
            input_data, mask_img = self.augment(input_data, mask_img,
                                                do_affine_augmentation)
        mask_img, _ = resize_img(mask_img,
                                 self.params.MASK_WIDTH,
                                 self.params.MASK_HEIGHT,
                                 offset_bottom=0,
                                 interpolation=cv2.INTER_NEAREST)

        # one hot encode based on class mapping from semseg spec
        mask_img = to_hex(
            mask_img)  # convert 3 channel representation to single hex channel
        colours = List()
        for _, colour in list(SEMSEG_CLASS_MAPPING.items()):
            hex_colour = (colour[0] << 16) + (colour[1] << 8) + colour[2]
            colours.append(hex_colour)
        pos_mask = np.ones((self.params.MASK_HEIGHT, self.params.MASK_WIDTH),
                           dtype=np.float32)
        mask_img, pos_mask = hex_to_one_hot(mask_img, pos_mask, colours)
        nb_classes = len(SEMSEG_CLASS_MAPPING)
        y_true_mask = to_categorical(mask_img, nb_classes)

        input_data = input_data.astype(np.float32)
        ground_truth = np.concatenate(
            (y_true_mask, np.expand_dims(pos_mask, axis=-1)), axis=-1)
        # elapsed_time = time.time() - start_time
        # print(str(elapsed_time) + " s")
        return raw_data, input_data, ground_truth, piped_params