Exemplo n.º 1
0
    def execute(self, min_resolution=1080):
        """
        Execute the demo, i.e. feed all of the desired input through the
        network and obtain predictions. Gracefully handles .txt, 
        or video file (.mp4, etc), or directory input.
        """
        logger.info('>>>>>>>>>>>>>>>> Start inference task >>>>>>>>>>>>>>>>')
        self.model.eval()
        """
        Since overlaid class text is difficult to read below 1080p, we upsample
        predictions.
        """
        logger.info(f'Write image prediction to {self.output_path}')

        rgb_img = imread_rgb(self.input_file)
        pred_label_img = self.execute_on_img(rgb_img)

        # avoid blurry images by upsampling RGB before overlaying text
        if np.amin(rgb_img.shape[:2]) < min_resolution:
            rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb')
            pred_label_img = resize_img_by_short_side(pred_label_img,
                                                      min_resolution, 'label')

        imageio.imwrite(self.output_path, pred_label_img)

        logger.info(
            '<<<<<<<<<<<<<<<<< Inference task completed <<<<<<<<<<<<<<<<<')
Exemplo n.º 2
0
def test_save_pred_vs_label_7tuple_short_side_60_img():
    """
	When the image is too low in resolution (e.g. short side 60),
	we cannot use cv2's text rendering code. Instead, we will also save
	the upsampled version.
	"""
    short_side = 60  # pixels

    data_dir = f'{TEST_DATA_ROOT}/Camvid_test_data'

    img_fpath = f'{data_dir}/images/0016E5_08159.png'
    label_fpath = f'{data_dir}/preds/0016E5_08159.png'

    img_rgb = imageio.imread(img_fpath)
    label_img = imageio.imread(label_fpath)

    img_rgb = resize_img_by_short_side(img_rgb, short_side, img_type='rgb')
    label_img = resize_img_by_short_side(label_img,
                                         short_side,
                                         img_type='label')

    img_h, img_w = label_img.shape

    pred_img = np.random.randint(0, 200, (img_h, img_w)).astype(np.uint16)
    id_to_class_name_map = get_dataloader_id_to_classname_map(
        'pascal-context-460')

    save_fpath = f'{TEST_DATA_ROOT}/rand_549_temp_small.png'
    save_pred_vs_label_7tuple(img_rgb, pred_img, label_img,
                              id_to_class_name_map, save_fpath)
    os.remove(
        f'{TEST_DATA_ROOT}/rand_549_temp_small_upsample_pred_labels_palette.png'
    )
    os.remove(f'{TEST_DATA_ROOT}/rand_549_temp_small_pred_labels_palette.png')
Exemplo n.º 3
0
    def render_single_img_pred(self, min_resolution: int = 1080):
        """
		Since overlaid class text is difficult to read below 1080p, we upsample
		predictions.
		"""
        in_fname_stem = Path(self.input_file).stem
        output_gray_fpath = f'{in_fname_stem}_gray.jpg'
        output_demo_fpath = f'{in_fname_stem}_overlaid_classes.jpg'
        logger.info(f'Write image prediction to {output_demo_fpath}')

        rgb_img = imread_rgb(self.input_file)
        pred_label_img = self.execute_on_img(rgb_img)

        # avoid blurry images by upsampling RGB before overlaying text
        if np.amin(rgb_img.shape[:2]) < min_resolution:
            rgb_img = resize_img_by_short_side(rgb_img, min_resolution, 'rgb')
            pred_label_img = resize_img_by_short_side(pred_label_img,
                                                      min_resolution, 'label')

        metadata = None
        frame_visualizer = Visualizer(rgb_img, metadata)
        overlaid_img = frame_visualizer.overlay_instances(
            label_map=pred_label_img,
            id_to_class_name_map=self.id_to_class_name_map)
        imageio.imwrite(output_demo_fpath, overlaid_img)
        imageio.imwrite(output_gray_fpath, pred_label_img)
Exemplo n.º 4
0
def save_pred_vs_label_7tuple(img_rgb: np.ndarray,
							pred_img: np.ndarray, 
							label_img: np.ndarray, 
							id_to_class_name_map: Mapping[int,str], 
							save_fpath: str) -> None:
	""" 7-tuple consists of 
			(1-3) rgb mask 3-sequence for label, 
			(4-6) rgb mask 3-sequence for predictions,
			(7) color palette 

		Args:
		-	img_rgb
		-	pred_img
		-	label_img
		-	id_to_class_name_map
		-	save_fpath

		Returns:
		-	None
	"""
	img_h, img_w, _ = img_rgb.shape
	assert pred_img.shape == (img_h, img_w)
	assert label_img.shape == (img_h, img_w)

	if min(img_h, img_w) < MIN_DISCERNABLE_RES_FOR_TEXT:
		save_pred_vs_label_7tuple(
			img_rgb=resize_img_by_short_side(img_rgb.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='rgb'),
			pred_img=resize_img_by_short_side(pred_img.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='label'),
			label_img=resize_img_by_short_side(label_img.copy(), short_side_len=MIN_DISCERNABLE_RES_FOR_TEXT, img_type='label'),
			id_to_class_name_map=id_to_class_name_map,
			save_fpath=save_fpath.replace('.png', '_upsample.png')
		)

	NUM_HSTACKED_IMGS = 3
	hstack_img1 = form_mask_triple(img_rgb, label_img, save_fpath, save_to_disk=False)
	hstack_img2 = form_mask_triple(img_rgb, pred_img, save_fpath, save_to_disk=False)

	vstack_img1 = np.vstack([hstack_img1, hstack_img2])

	save_dir = '/'.join(save_fpath.split('/')[:-1])

	present_color_ids = np.union1d( np.unique(label_img), np.unique(pred_img))
	num_present_colors = len(present_color_ids)
	max_colors_per_col = int(math.ceil(num_present_colors / NUM_HSTACKED_IMGS))

	palette_img = form_contained_classes_color_guide(
		present_color_ids, 
		id_to_class_name_map, 
		'', 
		'',
		save_to_disk=False, 
		max_colors_per_col=max_colors_per_col
	)
	
	vstack_img2 = vstack_img_with_palette(vstack_img1, palette_img)

	save_fpath = save_fpath.replace('.png', '_pred_labels_palette.png')
	cv2.imwrite(save_fpath, vstack_img2[:,:,::-1])
Exemplo n.º 5
0
    def execute_on_video(self,
                         max_num_frames: int = 5000,
                         min_resolution: int = 1080) -> None:
        """
		input_file is a path to a video file.
		Read frames from an RGB video file, and write overlaid
		predictions into a new video file.
			
			Args:
			-	None

			Returns:
			-	None
		"""
        in_fname_stem = Path(self.input_file).stem
        out_fname = f'{in_fname_stem}_{self.args.model_name}_universal'
        out_fname += f'_scales_{self.scales_str}_base_sz_{self.args.base_size}.mp4'

        output_video_fpath = f'{_ROOT}/temp_files/{out_fname}'
        create_leading_fpath_dirs(output_video_fpath)
        logger.info(f'Write video to {output_video_fpath}')
        writer = VideoWriter(output_video_fpath)

        video_fpath = '/Users/johnlamb/Downloads/sample_ffmpeg.mp4'
        reader = VideoReader(self.input_file)
        for frame_idx in range(reader.num_frames):
            logger.info(f'On image {frame_idx}/{reader.num_frames}')
            rgb_img = reader.get_frame()
            if frame_idx > max_num_frames:
                break
            pred_label_img = self.execute_on_img(rgb_img)

            # avoid blurry images by upsampling RGB before overlaying text
            if np.amin(rgb_img.shape[:2]) < min_resolution:
                rgb_img = resize_img_by_short_side(rgb_img, min_resolution,
                                                   'rgb')
                pred_label_img = resize_img_by_short_side(
                    pred_label_img, min_resolution, 'label')

            metadata = None
            frame_visualizer = Visualizer(rgb_img, metadata)
            output_img = frame_visualizer.overlay_instances(
                label_map=pred_label_img,
                id_to_class_name_map=self.id_to_class_name_map)
            writer.add_frame(output_img)

        reader.complete()
        writer.complete()
Exemplo n.º 6
0
def test_resize_img_by_short_side_label2():
    """
	Downsample 100x200 image with random content to 10x20.
	"""
    label = np.random.randint(0, 255, size=(200, 100))
    label = label.astype(np.uint8)
    label_resized = resize_img_by_short_side(label,
                                             short_side_len=10,
                                             img_type='label')

    assert label_resized.shape == (20, 10)
    assert label_resized.dtype == np.uint8
Exemplo n.º 7
0
def test_resize_img_by_short_side_rgb():
    """
	"""
    img_rgb = np.random.randn(800, 200, 3)
    img_rgb *= 255
    img_rgb = np.clip(img_rgb, 0, 255)
    img_rgb = img_rgb.astype(np.uint8)
    img_rgb_resized = resize_img_by_short_side(img_rgb,
                                               short_side_len=10,
                                               img_type='rgb')

    assert img_rgb_resized.shape == (40, 10, 3)
    assert img_rgb_resized.dtype == np.uint8