def process(self, raw_data, input_data, ground_truth, piped_params=None): # Add input_data img_t0 = cv2.imdecode(np.frombuffer(raw_data["img"], np.uint8), cv2.IMREAD_COLOR) img_t0, _ = resize_img(img_t0, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM) # Add ground_truth mask mask_t1 = cv2.imdecode(np.frombuffer(raw_data["depth"], np.uint8), cv2.IMREAD_ANYDEPTH) mask_t1, _ = resize_img(mask_t1, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST) # agument if "replay_img_aug" in piped_params and len(piped_params["replay_img_aug"]) > 0: for replay in piped_params["replay_img_aug"]: transformed = A.ReplayCompose.replay(replay, image=img_t0, mask=mask_t1) img_t0 = transformed["image"] mask_t1 = transformed["mask"] elif self.do_augmentation: img_t0, mask_t1 = self.augment(img_t0, mask_t1) img_t0 = img_t0.astype(np.float32) mask_t1, _ = resize_img(mask_t1, self.params.MASK_WIDTH, self.params.MASK_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST) # adjust mask values mask_t1 = mask_t1.astype(np.float32) mask_t1 /= 255.0 pos_mask = np.where(mask_t1 > 0.0, 1.0, 0.0) mask_t1 = np.clip(mask_t1, 4.1, 130.0) mask_t1 = 22 * np.sqrt(mask_t1 - 4) mask_t1 *= pos_mask input_data = img_t0 ground_truth = mask_t1 return raw_data, input_data, ground_truth, piped_params
def process(self, raw_data, input_data, ground_truth, piped_params=None): # Add input_data img_t0 = cv2.imdecode(np.frombuffer(raw_data[0]["img"], np.uint8), cv2.IMREAD_COLOR) img_t0, _ = resize_img(img_t0, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM) img_t1 = cv2.imdecode(np.frombuffer(raw_data[1]["img"], np.uint8), cv2.IMREAD_COLOR) img_t1, _ = resize_img(img_t1, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM) # img_t0, img_t1 = self.augment(img_t0, img_t1) img_t0 = img_t0.astype(np.float32) img_t1 = img_t1.astype(np.float32) # Add ground_truth mask mask_t0 = cv2.imdecode(np.frombuffer(raw_data[0]["depth"], np.uint8), cv2.IMREAD_ANYDEPTH) mask_t0, _ = resize_img(mask_t0, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST) mask_t0 = mask_t0.astype(np.float32) mask_t0 /= 255.0 mask_t0 = np.expand_dims(mask_t0, axis=-1) mask_t1 = cv2.imdecode(np.frombuffer(raw_data[1]["depth"], np.uint8), cv2.IMREAD_ANYDEPTH) mask_t1, _ = resize_img(mask_t1, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST) mask_t1 = mask_t1.astype(np.float32) mask_t1 /= 255.0 mask_t1 = np.expand_dims(mask_t1, axis=-1) # currently hardcoded for driving stereo dataset intr_scale = (self.params.INPUT_WIDTH / 1758.0) intr = np.array( [[2063.0 * intr_scale, 0.0, 978.0 * intr_scale], [0.0, 2063.0 * intr_scale, 584.0 * intr_scale], [0.0, 0.0, 1.0]], dtype=np.float32) input_data = [img_t0, img_t1, intr] ground_truth = [mask_t0, mask_t1] return raw_data, input_data, ground_truth, piped_params
def test_resize_img(self, org_width, org_height, goal_width, goal_height, offset_bottom): img = np.zeros((org_height, org_width)) resized_img, roi = resize_img(img, goal_width, goal_height, offset_bottom) assert resized_img.shape[0] == goal_height assert resized_img.shape[1] == goal_width
def create_dataset(input_shape): print("Resize Input to: " + str(input_shape)) dataset = [] # Create sample dataset for post training quantization client = MongoClient("mongodb://localhost:27017") collection = client["labels"]["nuscenes_train"] documents = collection.find({}).limit(600).skip(50) documents_list = list(documents) assert(len(documents_list) > 0) for i in range(len(documents_list)): decoded_img_t0 = np.frombuffer(documents_list[i]["img"], np.uint8) img_t0 = cv2.imdecode(decoded_img_t0, cv2.IMREAD_COLOR) img_t0, _ = resize_img(img_t0, input_shape[2], input_shape[1], offset_bottom=0) dataset.append(np.array([img_t0], dtype=np.float32)) return dataset
# cap = cv2.VideoCapture('/path/to/video.mp4') # cap.set(cv2.CAP_PROP_POS_MSEC, 0) # while (cap.isOpened()): # ret, img = cap.read() documents = collection.find({}) for doc in documents: decoded_img = np.frombuffer(doc["img"], np.uint8) img = cv2.imdecode(decoded_img, cv2.IMREAD_COLOR) gt_mask = None if doc["mask"] is not None: decoded_mask = np.frombuffer(doc["mask"], np.uint8) gt_mask = cv2.imdecode(decoded_mask, cv2.IMREAD_COLOR) img, roi = resize_img(img, args.img_width, args.img_height, args.offset_bottom) if is_tf_lite: img_input = img interpreter.set_tensor(input_details[0]['index'], [img_input]) #input_shape = input_details[0]['shape'] start_time = time.time() interpreter.invoke() elapsed_time = time.time() - start_time # The function `get_tensor()` returns a copy of the tensor data. Use `tensor()` in order to get a pointer to the tensor. output_data = interpreter.get_tensor(output_details[0]['index']) semseg_img = to_3channel(output_data[0].astype(np.float64), List(SEMSEG_CLASS_MAPPING.items())) else: img_arr = np.array([img]) start_time = time.time()
def main(args): args.path = "/home/jo/training_data/nuscenes/nuscenes-v1.0" args.resize = [640, 256, 0] client = MongoClient(args.conn) collection = client[args.db][args.collection] nusc = NuScenes(version="v1.0-mini", dataroot=args.path, verbose=True) nusc.list_scenes() for scene in tqdm(nusc.scene): next_sample_token = scene["first_sample_token"] while True: sample = nusc.get('sample', next_sample_token) next_sample_token = sample["next"] sample_data_token = sample["data"]["CAM_FRONT"] sample_data = nusc.get_sample_data(sample_data_token) cam_front_data = nusc.get('sample_data', sample_data_token) # Create image data img_path = sample_data[0] if os.path.exists(img_path): img = cv2.imread(img_path) nusc.render_pointcloud_in_image depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='LIDAR_TOP', camera_channel='CAM_FRONT') # Not sure about radar data, seems to not allign with lidar data very often, leaving it out for now # depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='RADAR_FRONT', camera_channel='CAM_FRONT', depth_map=depth_map) roi = Roi() if args.resize: img, roi = resize_img(img, args.resize[0], args.resize[1], args.resize[2]) depth_map, _ = resize_img(depth_map, args.resize[0], args.resize[1], args.resize[2], cv2.INTER_NEAREST) img_bytes = cv2.imencode('.jpeg', img)[1].tobytes() depth_bytes = cv2.imencode(".png", depth_map)[1].tobytes() content_type = "image/jpeg" else: print("WARNING: file not found: " + img_path + ", continue with next image") continue # Get sensor extrinsics, Not sure why roll and yaw seem to be PI/2 off compared to nuImage calibarted sensor sensor = nusc.get('calibrated_sensor', cam_front_data['calibrated_sensor_token']) q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1], sensor["rotation"][2], sensor["rotation"][3]) roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5 pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5 # print(sensor["translation"]) # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}") # Sensor calibration is static, pose would be dynamic. TODO: Somehow also add some sort of cam to cam motion to be learned # ego_pose = nusc.get("ego_pose", cam_front_data["ego_pose_token"]) # q = MyQuaternion(ego_pose["rotation"][0], ego_pose["rotation"][1], ego_pose["rotation"][2], ego_pose["rotation"][3]) # roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5 # pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) # yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5 # print(ego_pose["translation"]) # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}") entry = Entry( img=img_bytes, content_type=content_type, depth=depth_bytes, org_source="nuscenes", org_id=img_path, objects=[], ignore=[], has_3D_info=True, has_track_info=True, sensor_valid=True, yaw=wrap_angle(yaw), roll=wrap_angle(roll), pitch=wrap_angle(pitch), translation=sensor["translation"], scene_token=sample["scene_token"], timestamp=sample["timestamp"] ) labels = sample_data[1] idx_counter = 0 for box in labels: if box.name in CLASS_MAP.keys(): box.translate(np.array([0, box.wlh[2] / 2, 0])) # translate center center to bottom center pos_3d = np.array([*box.center, 1.0]) cam_mat = np.hstack((sample_data[2], np.zeros((3, 1)))) cam_mat[0][2] += roi.offset_left cam_mat[1][2] += roi.offset_top cam_mat *= roi.scale cam_mat[2][2] = 1.0 # create rotation matrix, somehow using the rotation matrix directly from nuscenes does not work # instead, we calc the rotation as it is in kitti and use the same code v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) rot_angle = wrap_angle(float(yaw) + math.pi * 0.5) # because parallel to optical view of camera = 90 deg rot_mat = np.array([ [math.cos(rot_angle), 0.0, math.sin(rot_angle), 0.0], [0.0, 1.0, 0.0, 0.0], [-math.sin(rot_angle), 0.0, math.cos(rot_angle), 0.0], [0.0, 0.0, 0.0, 1.0], ]) pos_2d = np.matmul(cam_mat, pos_3d) pos_2d /= pos_2d[2] box3d = calc_cuboid_from_3d(pos_3d, cam_mat, rot_mat, box.wlh[0], box.wlh[2], box.wlh[1]) box2d = bbox_from_cuboid(box3d) annotation = nusc.get("sample_annotation", box.token) instance_token = annotation["instance_token"] entry.objects.append(Object( obj_class=CLASS_MAP[box.name], box2d=box2d, box3d=box3d, box3d_valid=True, instance_token=instance_token, truncated=None, occluded=None, width=box.wlh[0], length=box.wlh[1], height=box.wlh[2], orientation=rot_angle, # converted to autosar coordinate system x=pos_3d[2], y=-pos_3d[0], z=-pos_3d[1] )) # For debugging show the data in the image box2d = list(map(int, box2d)) cv2.circle(img, (int(pos_2d[0]), int(pos_2d[1])), 3, (255, 0, 0)) cv2.rectangle(img, (box2d[0], box2d[1]), (box2d[0] + box2d[2], box2d[1] + box2d[3]), (255, 255, 0), 1) cv2.putText(img,f"{idx_counter}", (box2d[0], box2d[1] + int(box2d[3])), 0, 0.4, (255, 255, 255)) idx_counter += 1 print(f"{idx_counter}: {math.degrees(rot_angle)}") f, (ax1, ax2) = plt.subplots(2, 1) ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) max_val = np.amax(depth_map) ax2.imshow(depth_map, cmap="gray", vmin=1, vmax=10000) plt.show() # collection.insert_one(entry.get_dict()) if next_sample_token == "": break
curr_scene_depth_map_root = os.path.join(args.depth_map, folder) curr_scene_image_root = os.path.join(args.images, folder) _, _, filenames = next(os.walk(curr_scene_depth_map_root)) filenames.sort() # since we have video frames timestamp = 0 next_timestamp = 1 for filename in tqdm(filenames): depth_file = os.path.join(curr_scene_depth_map_root, filename) img_file = os.path.join(curr_scene_image_root, filename[:-3] + "jpg") if os.path.isfile(img_file): img_data = cv2.imread(img_file) depth_data = cv2.imread(depth_file, -1) # load as is (uint16 grayscale img) # depth_data_org = depth_data if resize_img is not None: depth_data, _ = resize_img(depth_data, args.resize[0], args.resize[1], args.resize[2], interpolation=cv2.INTER_NEAREST) img_data, _ = resize_img(img_data, args.resize[0], args.resize[1], args.resize[2]) img_bytes = cv2.imencode(".jpg", img_data)[1].tobytes() depth_bytes = cv2.imencode(".png", depth_data)[1].tobytes() entry = Entry( img=img_bytes, depth=depth_bytes, content_type="image/jpg", org_source="driving_stereo", org_id=filename[:-3], scene_token=folder, timestamp=timestamp, next_timestamp=next_timestamp )
def test(): params = DmdsParams() loss = DmdsLoss(params) batch_size = 2 client = MongoClient("mongodb://localhost:27017") collection = client["depth"]["driving_stereo"] documents = collection.find({}).limit(10).skip(300) documents = list(documents) for i in range(0, len(documents) - 1): intr = np.array( [[375.0, 0.0, 160.0], [0.0, 375.0, 128.0], [0.0, 0.0, 1.0]], dtype=np.float32) intr = np.stack([intr] * batch_size) img0 = cv2.imdecode(np.frombuffer(documents[i]["img"], np.uint8), cv2.IMREAD_COLOR) img0, _ = resize_img(img0, 320, 128, 0) img0 = np.stack([img0] * batch_size, axis=0) img0 = img0.astype(np.float32) img1 = cv2.imdecode(np.frombuffer(documents[i + 1]["img"], np.uint8), cv2.IMREAD_COLOR) img1, _ = resize_img(img1, 320, 128, 0) img1 = np.stack([img1] * batch_size, axis=0) img1 = img1.astype(np.float32) # create gt depth_maps x0 = cv2.imdecode(np.frombuffer(documents[i]["depth"], np.uint8), cv2.IMREAD_ANYDEPTH) x0, _ = resize_img(x0, 320, 128, 0, interpolation=cv2.INTER_NEAREST) x0 = np.expand_dims(x0, axis=-1) x0 = np.stack([x0] * batch_size, axis=0) x0 = x0.astype(np.float32) x0 /= 255.0 x1 = cv2.imdecode(np.frombuffer(documents[i + 1]["depth"], np.uint8), cv2.IMREAD_ANYDEPTH) x1, _ = resize_img(x1, 320, 128, 0, interpolation=cv2.INTER_NEAREST) x1 = np.expand_dims(x1, axis=-1) x1 = np.stack([x1] * batch_size, axis=0) x1 = x1.astype(np.float32) x1 /= 255.0 mm = np.zeros((*img0.shape[:-1], 3), dtype=np.float32) mm[:, 23:85, 132:320, :] = [0.0, 0.0, 0.0] mm_inv = np.zeros((*img0.shape[:-1], 3), dtype=np.float32) mm_inv[:, 23:85, 172:320, :] = [0.0, 0.0, 0.0] rot = np.zeros((batch_size, 1, 1, 3), dtype=np.float32) rot[:, 0, 0, :] = np.array([-0.1, 0.0, 0.1]) rot_inv = np.zeros((batch_size, 1, 1, 3), dtype=np.float32) rot_inv[:, 0, 0, :] = np.array([0.1, 0.0, -0.1]) tran = np.zeros((batch_size, 1, 1, 3), dtype=np.float32) tran[:, 0, 0, :] = np.array([0.0, 0.0, 10 ]) # [left,right | up,down | forward,backward] tran_inv = np.zeros((batch_size, 1, 1, 3), dtype=np.float32) tran_inv[:, 0, 0, :] = np.array([0.0, 0.0, -10]) loss.calc(img1, img0, x1, x0, mm_inv, mm, tran_inv, tran, rot_inv, rot, intr, x0, x1, 0) for key in loss.loss_vals.keys(): print(f"{key}: {loss.loss_vals[key].numpy()}") print(" - - - - - - - -") for i in range(len(img0)): f, ((ax11, ax12), (ax21, ax22), (ax31, ax32)) = plt.subplots(3, 2) # img0, img1 ax11.imshow( cv2.cvtColor((img0[i]).astype(np.uint8), cv2.COLOR_BGR2RGB)) ax12.imshow( cv2.cvtColor((img1[i]).astype(np.uint8), cv2.COLOR_BGR2RGB)) # x0, mm ax21.imshow(x1[i], cmap='gray', vmin=0, vmax=170) ax22.imshow((mm[i] * (255.0 / np.amax(mm[i]))).astype(np.uint8)) # mask, frame closer mask ax31.imshow(loss.warp_mask[i], cmap='gray', vmin=0, vmax=1) ax32.imshow( cv2.cvtColor((loss.resampled_img1[i].numpy()).astype(np.uint8), cv2.COLOR_BGR2RGB)) plt.show()
model: tf.keras.models.Model = tf.keras.models.load_model( args.model_path, compile=False) model.summary() print("Using Tensorflow") # alternative data source, mp4 video # cap = cv2.VideoCapture('/home/jo/training_data/speedchallenge/data/train.mp4') # while (cap.isOpened()): # ret, img = cap.read() documents = collection.find({}).limit(20) for doc in documents: decoded_img = np.frombuffer(doc["img"], np.uint8) img = cv2.imdecode(decoded_img, cv2.IMREAD_COLOR) input_img, roi = resize_img(img, input_shape[2], input_shape[1], offset_bottom=args.offset_bottom) if is_tf_lite: input_img = input_img.astype(np.float32) interpreter.set_tensor(input_details[0]['index'], [input_img]) start_time = time.time() interpreter.invoke() elapsed_time = time.time() - start_time # The function `get_tensor()` returns a copy of the tensor data. Use `tensor()` in order to get a pointer to the tensor. output_data = interpreter.get_tensor(output_details[0]['index']) output_mask = output_data[0] else: start_time = time.time() raw_result = model.predict(np.array([input_img])) elapsed_time = time.time() - start_time
args.model_path, custom_objects, compile=False) model.summary() print("Using Tensorflow") # alternative data source, mp4 video # cap = cv2.VideoCapture('/path/to/video.mp4') # cap.set(cv2.CAP_PROP_POS_MSEC, 0) # while (cap.isOpened()): # ret, img = cap.read() documents = collection.find({}).limit(1000) documents = list(documents) for i in range(0, len(documents) - 1): decoded_img_t0 = np.frombuffer(documents[i]["img"], np.uint8) img_t0 = cv2.imdecode(decoded_img_t0, cv2.IMREAD_COLOR) img_t0, _ = resize_img(img_t0, args.img_width, args.img_height, args.offset_bottom) decoded_img_t1 = np.frombuffer(documents[i + 1]["img"], np.uint8) img_t1 = cv2.imdecode(decoded_img_t1, cv2.IMREAD_COLOR) img_t1, _ = resize_img(img_t1, args.img_width, args.img_height, args.offset_bottom) intr = np.array( [[375.0, 0.0, 160.0], [0.0, 375.0, 128.0], [0.0, 0.0, 1.0]], dtype=np.float32) if is_tf_lite: interpreter.set_tensor(input_details[0]['index'], [img_t0]) interpreter.set_tensor(input_details[1]['index'], [img_t1]) #input_shape = input_details[0]['shape'] start_time = time.time()
def main(args): args.path = "/home/jo/training_data/nuscenes/nuimages-v1.0" args.resize = [640, 256, 0] client = MongoClient(args.conn) collection = client[args.db][args.collection] nuim = NuImages(version="v1.0-val", dataroot=args.path, lazy=True, verbose=False) for sample in tqdm(nuim.sample): sample_data = nuim.get("sample_data", sample["key_camera_token"]) # check if already exists, if yes, continue with next # check_db_entry = collection.find_one({ "org_source": "nuscenes", "org_id": sample_data["filename"]}) # if check_db_entry is not None: # print("WARNING: Entry " + str(sample_data["filename"]) + " already exists, continue with next image") # continue # Create image data img_path = args.path + "/" + sample_data["filename"] roi = Roi() if os.path.exists(img_path): if "CAM_FRONT/" in img_path: img = cv2.imread(img_path) if args.resize is not None: img, roi = resize_img(img, args.resize[0], args.resize[1], args.resize[2]) img_bytes = cv2.imencode('.jpeg', img)[1].tobytes() content_type = "image/jpeg" else: continue else: print("WARNING: file not found: " + img_path + ", continue with next image") continue # Get sensor extrinsics sensor = nuim.get("calibrated_sensor", sample_data["calibrated_sensor_token"]) q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1], sensor["rotation"][2], sensor["rotation"][3]) roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x), 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y), -1.0 + 2.0 * (q.w * q.w + q.x * q.x)) entry = Entry(img=img_bytes, content_type=content_type, org_source="nuimages", org_id=sample_data["filename"], objects=[], ignore=[], has_3D_info=False, has_track_info=False, sensor_valid=True, yaw=wrap_angle(yaw), roll=wrap_angle(roll), pitch=wrap_angle(pitch), translation=sensor["translation"]) # Create objects obj_tokens, surface_tokens = nuim.list_anns(sample['token'], verbose=False) for obj_token in obj_tokens: obj = nuim.get("object_ann", obj_token) obj_cat = nuim.get("category", obj["category_token"]) nu_class_name = obj_cat["name"] if nu_class_name not in IGNORE_CLASSES and nu_class_name in CLASS_MAP: obj_class = CLASS_MAP[nu_class_name] if obj_class not in list(OD_CLASS_MAPPING.keys()): print("WARNING: Unkown class " + obj_class) continue # TODO: Let's use attributes somehow e.g. car.moving # for attrib_token in obj["attribute_tokens"]: # obj_attrib = nuim.get("attribute", attrib_token) # x, y, width, height bbox = np.array([(obj["bbox"][0] + roi.offset_left), (obj["bbox"][1] + roi.offset_top), obj["bbox"][2] - obj["bbox"][0], obj["bbox"][3] - obj["bbox"][1]], dtype=np.float32) bbox *= roi.scale bbox = bbox.astype(np.int32) cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 1) entry.objects.append( Object( obj_class=obj_class, box2d=bbox.tolist(), box3d=None, box3d_valid=False, truncated=None, occluded=None, )) # f, (ax1) = plt.subplots(1, 1) # ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # plt.show() # upload to mongodb collection.insert_one(entry.get_dict())
def process(self, raw_data, input_data, ground_truth, piped_params=None): # start_time = time.time() # Add input_data img_encoded = np.frombuffer(raw_data["img"], np.uint8) input_data = cv2.imdecode(img_encoded, cv2.IMREAD_COLOR) input_data, roi_img = resize_img( input_data, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM) piped_params["roi_img"] = roi_img # Add ground_truth mask mask_encoded = np.frombuffer(raw_data["mask"], np.uint8) mask_img = cv2.imdecode(mask_encoded, cv2.IMREAD_COLOR) mask_img, _ = resize_img(mask_img, self.params.INPUT_WIDTH, self.params.INPUT_HEIGHT, offset_bottom=self.params.OFFSET_BOTTOM, interpolation=cv2.INTER_NEAREST) # augment and resize mask to real size if "replay_img_aug" in piped_params and len( piped_params["replay_img_aug"]) > 0: for replay in piped_params["replay_img_aug"]: transformed = A.ReplayCompose.replay(replay, image=input_data, mask=mask_img) input_data = transformed["image"] mask_img = transformed["mask"] elif self.start_augment is not None and piped_params[ "epoch"] >= self.start_augment[0]: do_affine_augmentation = piped_params[ "epoch"] >= self.start_augment[1] input_data, mask_img = self.augment(input_data, mask_img, do_affine_augmentation) mask_img, _ = resize_img(mask_img, self.params.MASK_WIDTH, self.params.MASK_HEIGHT, offset_bottom=0, interpolation=cv2.INTER_NEAREST) # one hot encode based on class mapping from semseg spec mask_img = to_hex( mask_img) # convert 3 channel representation to single hex channel colours = List() for _, colour in list(SEMSEG_CLASS_MAPPING.items()): hex_colour = (colour[0] << 16) + (colour[1] << 8) + colour[2] colours.append(hex_colour) pos_mask = np.ones((self.params.MASK_HEIGHT, self.params.MASK_WIDTH), dtype=np.float32) mask_img, pos_mask = hex_to_one_hot(mask_img, pos_mask, colours) nb_classes = len(SEMSEG_CLASS_MAPPING) y_true_mask = to_categorical(mask_img, nb_classes) input_data = input_data.astype(np.float32) ground_truth = np.concatenate( (y_true_mask, np.expand_dims(pos_mask, axis=-1)), axis=-1) # elapsed_time = time.time() - start_time # print(str(elapsed_time) + " s") return raw_data, input_data, ground_truth, piped_params