def test_read_partial_video_pts_unit_sec(self, start, offset): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit='sec') s_data = data[start:(start + offset)] assert len(lv) == offset assert_equal(s_data, lv) with av.open(f_name) as container: stream = container.streams[0] lv, _, _ = io.read_video(f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit='sec') if get_video_backend() == "pyav": # for "video_reader" backend, we don't decode the closest early frame # when the given start pts is not matching any frame pts assert len(lv) == 4 assert_equal(data[4:8], lv)
def test_anchor_generator(self): images = torch.randn(2, 3, 15, 15) features = self.get_features(images) image_shapes = [i.shape[-2:] for i in images] images = ImageList(images, image_shapes) model = self._init_test_anchor_generator() model.eval() anchors = model(images, features) # Estimate the number of target anchors grid_sizes = [f.shape[-2:] for f in features] num_anchors_estimated = 0 for sizes, num_anchors_per_loc in zip( grid_sizes, model.num_anchors_per_location()): num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc anchors_output = torch.tensor([ [-5.0, -5.0, 5.0, 5.0], [0.0, -5.0, 10.0, 5.0], [5.0, -5.0, 15.0, 5.0], [-5.0, 0.0, 5.0, 10.0], [0.0, 0.0, 10.0, 10.0], [5.0, 0.0, 15.0, 10.0], [-5.0, 5.0, 5.0, 15.0], [0.0, 5.0, 10.0, 15.0], [5.0, 5.0, 15.0, 15.0], ]) assert num_anchors_estimated == 9 assert len(anchors) == 2 assert tuple(anchors[0].shape) == (9, 4) assert tuple(anchors[1].shape) == (9, 4) assert_equal(anchors[0], anchors_output) assert_equal(anchors[1], anchors_output)
def test_features_image(self, p): input, expected = self.input_expected_image_tensor(p) transform = transforms.RandomVerticalFlip(p=p) actual = transform(features.Image(input)) assert_equal(features.Image(expected), actual)
def test_pil_image(self, p): input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) transform = transforms.RandomVerticalFlip(p=p) actual = transform(to_pil_image(input)) assert_equal(expected, pil_to_tensor(actual))
def test_features_segmentation_mask(self, p): input, expected = self.input_expected_image_tensor(p) transform = transforms.RandomVerticalFlip(p=p) actual = transform(features.SegmentationMask(input)) assert_equal(features.SegmentationMask(expected), actual)
def test_simple_tensor(self, p): input, expected = self.input_expected_image_tensor(p) transform = transforms.RandomVerticalFlip(p=p) actual = transform(input) assert_equal(expected, actual)
def test_write_video_with_audio(self): f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4") video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec") with get_tmp_dir() as tmpdir: out_f_name = os.path.join(tmpdir, "testing.mp4") io.video.write_video( out_f_name, video_tensor, round(info["video_fps"]), video_codec="libx264rgb", options={'crf': '0'}, audio_array=audio_tensor, audio_fps=info["audio_fps"], audio_codec="aac", ) out_video_tensor, out_audio_tensor, out_info = io.read_video( out_f_name, pts_unit="sec") assert info["video_fps"] == out_info["video_fps"] assert_equal(video_tensor, out_video_tensor) audio_stream = av.open(f_name).streams.audio[0] out_audio_stream = av.open(out_f_name).streams.audio[0] assert info["audio_fps"] == out_info["audio_fps"] assert audio_stream.rate == out_audio_stream.rate assert pytest.approx(out_audio_stream.frames, rel=0.0, abs=1) == audio_stream.frames assert audio_stream.frame_size == out_audio_stream.frame_size
def test_uniform_clip_sampler_insufficient_clips(self, tmpdir): video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25]) video_clips = VideoClips(video_list, 5, 5) sampler = UniformClipSampler(video_clips, 3) assert len(sampler) == 3 * 3 indices = torch.tensor(list(iter(sampler))) assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
def test_square_rotations(self, device, height, width, dt, angle, config, fn): # 2) Test rotation tensor, pil_img = _create_data(height, width, device=device) if dt == torch.float16 and device == "cpu": # skip float16 on CPU case return if dt is not None: tensor = tensor.to(dtype=dt) out_pil_img = F.affine( pil_img, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST ) out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))).to(device) out_tensor = fn( tensor, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST ) if config is not None: assert_equal(torch.rot90(tensor, **config), out_tensor) if out_tensor.dtype != torch.uint8: out_tensor = out_tensor.to(torch.uint8) num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] # Tolerance : less than 6% of different pixels assert ratio_diff_pixels < 0.06, "{}\n{} vs \n{}".format( ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7] )
def test_read_interlaced_png(): imgs = list(get_images(INTERLACED_PNG, ".png")) with Image.open(imgs[0]) as im1, Image.open(imgs[1]) as im2: assert not (im1.info.get("interlace") is im2.info.get("interlace")) img1 = read_image(imgs[0]) img2 = read_image(imgs[1]) assert_equal(img1, img2)
def test_x_crop(fn, method, out_length, size, device): meth_kwargs = fn_kwargs = {"size": size} scripted_fn = torch.jit.script(fn) tensor, pil_img = _create_data(height=20, width=20, device=device) transformed_t_list = fn(tensor, **fn_kwargs) transformed_p_list = fn(pil_img, **fn_kwargs) assert len(transformed_t_list) == len(transformed_p_list) assert len(transformed_t_list) == out_length for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list): _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img) transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs) assert len(transformed_t_list) == len(transformed_t_list_script) assert len(transformed_t_list_script) == out_length for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script): assert_equal(transformed_tensor, transformed_tensor_script) # test for class interface fn = method(**meth_kwargs) scripted_fn = torch.jit.script(fn) output = scripted_fn(tensor) assert len(output) == len(transformed_t_list_script) # test on batch of tensors batch_tensors = _create_data_batch(height=23, width=34, channels=3, num_samples=4, device=device) torch.manual_seed(12) transformed_batch_list = fn(batch_tensors) for i in range(len(batch_tensors)): img_tensor = batch_tensors[i, ...] torch.manual_seed(12) transformed_img_list = fn(img_tensor) for transformed_img, transformed_batch in zip(transformed_img_list, transformed_batch_list): assert_equal(transformed_img, transformed_batch[i, ...])
def test_rotate_interpolation_type(self): tensor, _ = _create_data(26, 26) # assert changed type warning with pytest.warns(UserWarning, match=r"Argument interpolation should be of type InterpolationMode"): res1 = F.rotate(tensor, 45, interpolation=2) res2 = F.rotate(tensor, 45, interpolation=BILINEAR) assert_equal(res1, res2)
def test_random_apply(device): tensor, _ = _create_data(26, 34, device=device) tensor = tensor.to(dtype=torch.float32) / 255.0 transforms = T.RandomApply([ T.RandomHorizontalFlip(), T.ColorJitter(), ], p=0.4) s_transforms = T.RandomApply(torch.nn.ModuleList([ T.RandomHorizontalFlip(), T.ColorJitter(), ]), p=0.4) scripted_fn = torch.jit.script(s_transforms) torch.manual_seed(12) transformed_tensor = transforms(tensor) torch.manual_seed(12) transformed_tensor_script = scripted_fn(tensor) assert_equal(transformed_tensor, transformed_tensor_script, msg="{}".format(transforms)) if device == "cpu": # Can't check this twice, otherwise # "Can't redefine method: forward on class: __torch__.torchvision.transforms.transforms.RandomApply" transforms = T.RandomApply([ T.ColorJitter(), ], p=0.3) with pytest.raises( RuntimeError, match="Module 'RandomApply' has no attribute 'transforms'"): torch.jit.script(transforms)
def test_rotate_deprecation_resample(self): tensor, _ = _create_data(26, 26) # assert deprecation warning and non-BC with pytest.warns(UserWarning, match=r"Argument resample is deprecated and will be removed"): res1 = F.rotate(tensor, 45, resample=2) res2 = F.rotate(tensor, 45, interpolation=BILINEAR) assert_equal(res1, res2)
def test_forward_negative_sample_ssd(self): model = torchvision.models.detection.ssd300_vgg16(num_classes=2, pretrained_backbone=False) images, targets = self._make_empty_sample() loss_dict = model(images, targets) assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0))
def test_encode_jpeg_reference(img_path): # This test is *wrong*. # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg (the reference), but it # starts encoding the torchvision version from an image that comes from # decode_jpeg, which can yield different results from pil.decode (see # test_decode... which uses a high tolerance). # Instead, we should start encoding from the exact same decoded image, for a # valid comparison. This is done in test_encode_jpeg, but unfortunately # these more correct tests fail on windows (probably because of a difference # in libjpeg) between torchvision and PIL. # FIXME: make the correct tests pass on windows and remove this. dirname = os.path.dirname(img_path) filename, _ = os.path.splitext(os.path.basename(img_path)) write_folder = os.path.join(dirname, 'jpeg_write') expected_file = os.path.join( write_folder, '{0}_pil.jpg'.format(filename)) img = decode_jpeg(read_file(img_path)) with open(expected_file, 'rb') as f: pil_bytes = f.read() pil_bytes = torch.as_tensor(list(pil_bytes), dtype=torch.uint8) for src_img in [img, img.contiguous()]: # PIL sets jpeg quality to 75 by default jpeg_bytes = encode_jpeg(src_img, quality=75) assert_equal(jpeg_bytes, pil_bytes)
def test_normalize_video(self, channels): def samples_from_standard_normal(tensor): p_value = stats.kstest(list(tensor.view(-1)), "norm", args=(0, 1)).pvalue return p_value > 0.0001 random_state = random.getstate() random.seed(42) numFrames = random.randint(4, 128) height = random.randint(32, 256) width = random.randint(32, 256) mean = random.random() std = random.random() clip = torch.normal(mean, std, size=(channels, numFrames, height, width)) mean = [clip[c].mean().item() for c in range(channels)] std = [clip[c].std().item() for c in range(channels)] normalized = transforms.NormalizeVideo(mean, std)(clip) assert samples_from_standard_normal(normalized) random.setstate(random_state) # Checking the optional in-place behaviour tensor = torch.rand((3, 128, 16, 16)) tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor) assert_equal(tensor, tensor_inplace) transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
def test_read_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): lv, _, info = io.read_video(f_name, pts_unit='sec') assert_equal(data, lv) assert info["video_fps"] == 5 assert info == {"video_fps": 5}
def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir): video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[25, 25, 25]) video_clips = VideoClips(video_list, 5, 5) clip_sampler = UniformClipSampler(video_clips, 3) distributed_sampler_rank0 = DistributedSampler( clip_sampler, num_replicas=2, rank=0, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank0))) assert len(distributed_sampler_rank0) == 6 assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14])) distributed_sampler_rank1 = DistributedSampler( clip_sampler, num_replicas=2, rank=1, group_size=3, ) indices = torch.tensor(list(iter(distributed_sampler_rank1))) assert len(distributed_sampler_rank1) == 6 assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4]))
def test_compose(device): tensor, _ = _create_data(26, 34, device=device) tensor = tensor.to(dtype=torch.float32) / 255.0 transforms = T.Compose( [ T.CenterCrop(10), T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ] ) s_transforms = torch.nn.Sequential(*transforms.transforms) scripted_fn = torch.jit.script(s_transforms) torch.manual_seed(12) transformed_tensor = transforms(tensor) torch.manual_seed(12) transformed_tensor_script = scripted_fn(tensor) assert_equal(transformed_tensor, transformed_tensor_script, msg=f"{transforms}") t = T.Compose( [ lambda x: x, ] ) with pytest.raises(RuntimeError, match="cannot call a value of type 'Tensor'"): torch.jit.script(t)
def _test_class_op(transform_cls, device, channels=3, meth_kwargs=None, test_exact_match=True, **match_kwargs): meth_kwargs = meth_kwargs or {} # test for class interface f = transform_cls(**meth_kwargs) scripted_fn = torch.jit.script(f) tensor, pil_img = _create_data(26, 34, channels, device=device) # set seed to reproduce the same transformation for tensor and PIL image torch.manual_seed(12) transformed_tensor = f(tensor) torch.manual_seed(12) transformed_pil_img = f(pil_img) if test_exact_match: _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img, **match_kwargs) else: _assert_approx_equal_tensor_to_pil(transformed_tensor.float(), transformed_pil_img, **match_kwargs) torch.manual_seed(12) transformed_tensor_script = scripted_fn(tensor) assert_equal(transformed_tensor, transformed_tensor_script) batch_tensors = _create_data_batch(height=23, width=34, channels=channels, num_samples=4, device=device) _test_transform_vs_scripted_on_batch(f, scripted_fn, batch_tensors) with get_tmp_dir() as tmp_dir: scripted_fn.save(os.path.join(tmp_dir, f"t_{transform_cls.__name__}.pt"))
def test_pad(device, dt, pad, config): script_fn = torch.jit.script(F.pad) tensor, pil_img = _create_data(7, 8, device=device) batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) if dt == torch.float16 and device == "cpu": # skip float16 on CPU case return if dt is not None: # This is a trivial cast to float of uint8 data to test all cases tensor = tensor.to(dt) batch_tensors = batch_tensors.to(dt) pad_tensor = F_t.pad(tensor, pad, **config) pad_pil_img = F_pil.pad(pil_img, pad, **config) pad_tensor_8b = pad_tensor # we need to cast to uint8 to compare with PIL image if pad_tensor_8b.dtype != torch.uint8: pad_tensor_8b = pad_tensor_8b.to(torch.uint8) _assert_equal_tensor_to_pil(pad_tensor_8b, pad_pil_img, msg="{}, {}".format(pad, config)) if isinstance(pad, int): script_pad = [pad, ] else: script_pad = pad pad_tensor_script = script_fn(tensor, script_pad, **config) assert_equal(pad_tensor, pad_tensor_script, msg="{}, {}".format(pad, config)) _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config)
def test_ten_crop(device): script_ten_crop = torch.jit.script(F.ten_crop) img_tensor, pil_img = _create_data(32, 34, device=device) cropped_pil_images = F.ten_crop(pil_img, [10, 11]) cropped_tensors = F.ten_crop(img_tensor, [10, 11]) for i in range(10): _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) cropped_tensors = script_ten_crop(img_tensor, [10, 11]) for i in range(10): _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i]) batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device) tuple_transformed_batches = F.ten_crop(batch_tensors, [10, 11]) for i in range(len(batch_tensors)): img_tensor = batch_tensors[i, ...] tuple_transformed_imgs = F.ten_crop(img_tensor, [10, 11]) assert len(tuple_transformed_imgs) == len(tuple_transformed_batches) for j in range(len(tuple_transformed_imgs)): true_transformed_img = tuple_transformed_imgs[j] transformed_img = tuple_transformed_batches[j][i, ...] assert_equal(true_transformed_img, transformed_img) # scriptable function test s_tuple_transformed_batches = script_ten_crop(batch_tensors, [10, 11]) for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches): assert_equal(transformed_batch, s_transformed_batch)
def test_transform_copy_targets(self): transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) image = [torch.rand(3, 200, 300), torch.rand(3, 200, 200)] targets = [{"boxes": torch.rand(3, 4)}, {"boxes": torch.rand(2, 4)}] targets_copy = copy.deepcopy(targets) out = transform(image, targets) # noqa: F841 assert_equal(targets[0]["boxes"], targets_copy[0]["boxes"]) assert_equal(targets[1]["boxes"], targets_copy[1]["boxes"])
def test_read_1_bit_png_consistency(shape, mode): with get_tmp_dir() as root: image_path = os.path.join(root, f'test_{shape}.png') pixels = np.random.rand(*shape) > 0.5 img = Image.fromarray(pixels) img.save(image_path) img1 = read_image(image_path, mode) img2 = read_image(image_path, mode) assert_equal(img1, img2)
def test_read_1_bit_png(shape): with get_tmp_dir() as root: image_path = os.path.join(root, f'test_{shape}.png') pixels = np.random.rand(*shape) > 0.5 img = Image.fromarray(pixels) img.save(image_path) img1 = read_image(image_path) img2 = normalize_dimensions(torch.as_tensor(pixels * 255, dtype=torch.uint8)) assert_equal(img1, img2)
def test_forward_negative_sample_retinanet(self): model = torchvision.models.detection.retinanet_resnet50_fpn( num_classes=2, min_size=100, max_size=100, pretrained_backbone=False ) images, targets = self._make_empty_sample() loss_dict = model(images, targets) assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0))
def test_perspective_interpolation_warning(): # assert changed type warning spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] tensor = torch.randint(0, 256, (3, 26, 26)) with pytest.warns(UserWarning, match="Argument interpolation should be of type InterpolationMode"): res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2) res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) assert_equal(res1, res2)
def test_read_1_bit_png_consistency(shape, mode, tmpdir): np_rng = np.random.RandomState(0) image_path = os.path.join(tmpdir, f"test_{shape}.png") pixels = np_rng.rand(*shape) > 0.5 img = Image.fromarray(pixels) img.save(image_path) img1 = read_image(image_path, mode) img2 = read_image(image_path, mode) assert_equal(img1, img2)
def test_forward_negative_sample_frcnn(self, name): model = torchvision.models.detection.__dict__[name]( num_classes=2, min_size=100, max_size=100, pretrained_backbone=False ) images, targets = self._make_empty_sample() loss_dict = model(images, targets) assert_equal(loss_dict["loss_box_reg"], torch.tensor(0.0)) assert_equal(loss_dict["loss_rpn_box_reg"], torch.tensor(0.0))