示例#1
0
文件: test_io.py 项目: ofekp/vision
    def test_read_partial_video_pts_unit_sec(self, start, offset):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            pts, _ = io.read_video_timestamps(f_name, pts_unit='sec')

            lv, _, _ = io.read_video(f_name,
                                     pts[start],
                                     pts[start + offset - 1],
                                     pts_unit='sec')
            s_data = data[start:(start + offset)]
            assert len(lv) == offset
            assert_equal(s_data, lv)

            with av.open(f_name) as container:
                stream = container.streams[0]
                lv, _, _ = io.read_video(f_name,
                                         int(pts[4] *
                                             (1.0 / stream.time_base) + 1) *
                                         stream.time_base,
                                         pts[7],
                                         pts_unit='sec')
            if get_video_backend() == "pyav":
                # for "video_reader" backend, we don't decode the closest early frame
                # when the given start pts is not matching any frame pts
                assert len(lv) == 4
                assert_equal(data[4:8], lv)
    def test_anchor_generator(self):
        images = torch.randn(2, 3, 15, 15)
        features = self.get_features(images)
        image_shapes = [i.shape[-2:] for i in images]
        images = ImageList(images, image_shapes)

        model = self._init_test_anchor_generator()
        model.eval()
        anchors = model(images, features)

        # Estimate the number of target anchors
        grid_sizes = [f.shape[-2:] for f in features]
        num_anchors_estimated = 0
        for sizes, num_anchors_per_loc in zip(
                grid_sizes, model.num_anchors_per_location()):
            num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc

        anchors_output = torch.tensor([
            [-5.0, -5.0, 5.0, 5.0],
            [0.0, -5.0, 10.0, 5.0],
            [5.0, -5.0, 15.0, 5.0],
            [-5.0, 0.0, 5.0, 10.0],
            [0.0, 0.0, 10.0, 10.0],
            [5.0, 0.0, 15.0, 10.0],
            [-5.0, 5.0, 5.0, 15.0],
            [0.0, 5.0, 10.0, 15.0],
            [5.0, 5.0, 15.0, 15.0],
        ])

        assert num_anchors_estimated == 9
        assert len(anchors) == 2
        assert tuple(anchors[0].shape) == (9, 4)
        assert tuple(anchors[1].shape) == (9, 4)
        assert_equal(anchors[0], anchors_output)
        assert_equal(anchors[1], anchors_output)
    def test_features_image(self, p):
        input, expected = self.input_expected_image_tensor(p)
        transform = transforms.RandomVerticalFlip(p=p)

        actual = transform(features.Image(input))

        assert_equal(features.Image(expected), actual)
    def test_pil_image(self, p):
        input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
        transform = transforms.RandomVerticalFlip(p=p)

        actual = transform(to_pil_image(input))

        assert_equal(expected, pil_to_tensor(actual))
    def test_features_segmentation_mask(self, p):
        input, expected = self.input_expected_image_tensor(p)
        transform = transforms.RandomVerticalFlip(p=p)

        actual = transform(features.SegmentationMask(input))

        assert_equal(features.SegmentationMask(expected), actual)
    def test_simple_tensor(self, p):
        input, expected = self.input_expected_image_tensor(p)
        transform = transforms.RandomVerticalFlip(p=p)

        actual = transform(input)

        assert_equal(expected, actual)
示例#7
0
文件: test_io.py 项目: ofekp/vision
    def test_write_video_with_audio(self):
        f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4")
        video_tensor, audio_tensor, info = io.read_video(f_name,
                                                         pts_unit="sec")

        with get_tmp_dir() as tmpdir:
            out_f_name = os.path.join(tmpdir, "testing.mp4")
            io.video.write_video(
                out_f_name,
                video_tensor,
                round(info["video_fps"]),
                video_codec="libx264rgb",
                options={'crf': '0'},
                audio_array=audio_tensor,
                audio_fps=info["audio_fps"],
                audio_codec="aac",
            )

            out_video_tensor, out_audio_tensor, out_info = io.read_video(
                out_f_name, pts_unit="sec")

            assert info["video_fps"] == out_info["video_fps"]
            assert_equal(video_tensor, out_video_tensor)

            audio_stream = av.open(f_name).streams.audio[0]
            out_audio_stream = av.open(out_f_name).streams.audio[0]

            assert info["audio_fps"] == out_info["audio_fps"]
            assert audio_stream.rate == out_audio_stream.rate
            assert pytest.approx(out_audio_stream.frames, rel=0.0,
                                 abs=1) == audio_stream.frames
            assert audio_stream.frame_size == out_audio_stream.frame_size
示例#8
0
 def test_uniform_clip_sampler_insufficient_clips(self, tmpdir):
     video_list = get_list_of_videos(tmpdir, num_videos=3, sizes=[10, 25, 25])
     video_clips = VideoClips(video_list, 5, 5)
     sampler = UniformClipSampler(video_clips, 3)
     assert len(sampler) == 3 * 3
     indices = torch.tensor(list(iter(sampler)))
     assert_equal(indices, torch.tensor([0, 0, 1, 2, 4, 6, 7, 9, 11]))
示例#9
0
    def test_square_rotations(self, device, height, width, dt, angle, config, fn):
        # 2) Test rotation
        tensor, pil_img = _create_data(height, width, device=device)

        if dt == torch.float16 and device == "cpu":
            # skip float16 on CPU case
            return

        if dt is not None:
            tensor = tensor.to(dtype=dt)

        out_pil_img = F.affine(
            pil_img, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST
        )
        out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))).to(device)

        out_tensor = fn(
            tensor, angle=angle, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST
        )
        if config is not None:
            assert_equal(torch.rot90(tensor, **config), out_tensor)

        if out_tensor.dtype != torch.uint8:
            out_tensor = out_tensor.to(torch.uint8)

        num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
        ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
        # Tolerance : less than 6% of different pixels
        assert ratio_diff_pixels < 0.06, "{}\n{} vs \n{}".format(
            ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
        )
示例#10
0
def test_read_interlaced_png():
    imgs = list(get_images(INTERLACED_PNG, ".png"))
    with Image.open(imgs[0]) as im1, Image.open(imgs[1]) as im2:
        assert not (im1.info.get("interlace") is im2.info.get("interlace"))
    img1 = read_image(imgs[0])
    img2 = read_image(imgs[1])
    assert_equal(img1, img2)
示例#11
0
def test_x_crop(fn, method, out_length, size, device):
    meth_kwargs = fn_kwargs = {"size": size}
    scripted_fn = torch.jit.script(fn)

    tensor, pil_img = _create_data(height=20, width=20, device=device)
    transformed_t_list = fn(tensor, **fn_kwargs)
    transformed_p_list = fn(pil_img, **fn_kwargs)
    assert len(transformed_t_list) == len(transformed_p_list)
    assert len(transformed_t_list) == out_length
    for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list):
        _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img)

    transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs)
    assert len(transformed_t_list) == len(transformed_t_list_script)
    assert len(transformed_t_list_script) == out_length
    for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script):
        assert_equal(transformed_tensor, transformed_tensor_script)

    # test for class interface
    fn = method(**meth_kwargs)
    scripted_fn = torch.jit.script(fn)
    output = scripted_fn(tensor)
    assert len(output) == len(transformed_t_list_script)

    # test on batch of tensors
    batch_tensors = _create_data_batch(height=23, width=34, channels=3, num_samples=4, device=device)
    torch.manual_seed(12)
    transformed_batch_list = fn(batch_tensors)

    for i in range(len(batch_tensors)):
        img_tensor = batch_tensors[i, ...]
        torch.manual_seed(12)
        transformed_img_list = fn(img_tensor)
        for transformed_img, transformed_batch in zip(transformed_img_list, transformed_batch_list):
            assert_equal(transformed_img, transformed_batch[i, ...])
示例#12
0
 def test_rotate_interpolation_type(self):
     tensor, _ = _create_data(26, 26)
     # assert changed type warning
     with pytest.warns(UserWarning, match=r"Argument interpolation should be of type InterpolationMode"):
         res1 = F.rotate(tensor, 45, interpolation=2)
         res2 = F.rotate(tensor, 45, interpolation=BILINEAR)
         assert_equal(res1, res2)
示例#13
0
def test_random_apply(device):
    tensor, _ = _create_data(26, 34, device=device)
    tensor = tensor.to(dtype=torch.float32) / 255.0

    transforms = T.RandomApply([
        T.RandomHorizontalFlip(),
        T.ColorJitter(),
    ],
                               p=0.4)
    s_transforms = T.RandomApply(torch.nn.ModuleList([
        T.RandomHorizontalFlip(),
        T.ColorJitter(),
    ]),
                                 p=0.4)

    scripted_fn = torch.jit.script(s_transforms)
    torch.manual_seed(12)
    transformed_tensor = transforms(tensor)
    torch.manual_seed(12)
    transformed_tensor_script = scripted_fn(tensor)
    assert_equal(transformed_tensor,
                 transformed_tensor_script,
                 msg="{}".format(transforms))

    if device == "cpu":
        # Can't check this twice, otherwise
        # "Can't redefine method: forward on class: __torch__.torchvision.transforms.transforms.RandomApply"
        transforms = T.RandomApply([
            T.ColorJitter(),
        ], p=0.3)
        with pytest.raises(
                RuntimeError,
                match="Module 'RandomApply' has no attribute 'transforms'"):
            torch.jit.script(transforms)
示例#14
0
 def test_rotate_deprecation_resample(self):
     tensor, _ = _create_data(26, 26)
     # assert deprecation warning and non-BC
     with pytest.warns(UserWarning, match=r"Argument resample is deprecated and will be removed"):
         res1 = F.rotate(tensor, 45, resample=2)
         res2 = F.rotate(tensor, 45, interpolation=BILINEAR)
         assert_equal(res1, res2)
    def test_forward_negative_sample_ssd(self):
        model = torchvision.models.detection.ssd300_vgg16(num_classes=2, pretrained_backbone=False)

        images, targets = self._make_empty_sample()
        loss_dict = model(images, targets)

        assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0))
示例#16
0
def test_encode_jpeg_reference(img_path):
    # This test is *wrong*.
    # It compares a torchvision-encoded jpeg with a PIL-encoded jpeg (the reference), but it
    # starts encoding the torchvision version from an image that comes from
    # decode_jpeg, which can yield different results from pil.decode (see
    # test_decode... which uses a high tolerance).
    # Instead, we should start encoding from the exact same decoded image, for a
    # valid comparison. This is done in test_encode_jpeg, but unfortunately
    # these more correct tests fail on windows (probably because of a difference
    # in libjpeg) between torchvision and PIL.
    # FIXME: make the correct tests pass on windows and remove this.
    dirname = os.path.dirname(img_path)
    filename, _ = os.path.splitext(os.path.basename(img_path))
    write_folder = os.path.join(dirname, 'jpeg_write')
    expected_file = os.path.join(
        write_folder, '{0}_pil.jpg'.format(filename))
    img = decode_jpeg(read_file(img_path))

    with open(expected_file, 'rb') as f:
        pil_bytes = f.read()
        pil_bytes = torch.as_tensor(list(pil_bytes), dtype=torch.uint8)
    for src_img in [img, img.contiguous()]:
        # PIL sets jpeg quality to 75 by default
        jpeg_bytes = encode_jpeg(src_img, quality=75)
        assert_equal(jpeg_bytes, pil_bytes)
示例#17
0
    def test_normalize_video(self, channels):
        def samples_from_standard_normal(tensor):
            p_value = stats.kstest(list(tensor.view(-1)), "norm",
                                   args=(0, 1)).pvalue
            return p_value > 0.0001

        random_state = random.getstate()
        random.seed(42)

        numFrames = random.randint(4, 128)
        height = random.randint(32, 256)
        width = random.randint(32, 256)
        mean = random.random()
        std = random.random()
        clip = torch.normal(mean,
                            std,
                            size=(channels, numFrames, height, width))
        mean = [clip[c].mean().item() for c in range(channels)]
        std = [clip[c].std().item() for c in range(channels)]
        normalized = transforms.NormalizeVideo(mean, std)(clip)
        assert samples_from_standard_normal(normalized)
        random.setstate(random_state)

        # Checking the optional in-place behaviour
        tensor = torch.rand((3, 128, 16, 16))
        tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5),
                                                   (0.5, 0.5, 0.5),
                                                   inplace=True)(tensor)
        assert_equal(tensor, tensor_inplace)

        transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5),
                                  inplace=True).__repr__()
示例#18
0
文件: test_io.py 项目: ofekp/vision
    def test_read_video_pts_unit_sec(self):
        with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
            lv, _, info = io.read_video(f_name, pts_unit='sec')

            assert_equal(data, lv)
            assert info["video_fps"] == 5
            assert info == {"video_fps": 5}
示例#19
0
    def test_distributed_sampler_and_uniform_clip_sampler(self, tmpdir):
        video_list = get_list_of_videos(tmpdir,
                                        num_videos=3,
                                        sizes=[25, 25, 25])
        video_clips = VideoClips(video_list, 5, 5)
        clip_sampler = UniformClipSampler(video_clips, 3)

        distributed_sampler_rank0 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=0,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank0)))
        assert len(distributed_sampler_rank0) == 6
        assert_equal(indices, torch.tensor([0, 2, 4, 10, 12, 14]))

        distributed_sampler_rank1 = DistributedSampler(
            clip_sampler,
            num_replicas=2,
            rank=1,
            group_size=3,
        )
        indices = torch.tensor(list(iter(distributed_sampler_rank1)))
        assert len(distributed_sampler_rank1) == 6
        assert_equal(indices, torch.tensor([5, 7, 9, 0, 2, 4]))
示例#20
0
def test_compose(device):
    tensor, _ = _create_data(26, 34, device=device)
    tensor = tensor.to(dtype=torch.float32) / 255.0
    transforms = T.Compose(
        [
            T.CenterCrop(10),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ]
    )
    s_transforms = torch.nn.Sequential(*transforms.transforms)

    scripted_fn = torch.jit.script(s_transforms)
    torch.manual_seed(12)
    transformed_tensor = transforms(tensor)
    torch.manual_seed(12)
    transformed_tensor_script = scripted_fn(tensor)
    assert_equal(transformed_tensor, transformed_tensor_script, msg=f"{transforms}")

    t = T.Compose(
        [
            lambda x: x,
        ]
    )
    with pytest.raises(RuntimeError, match="cannot call a value of type 'Tensor'"):
        torch.jit.script(t)
示例#21
0
def _test_class_op(transform_cls, device, channels=3, meth_kwargs=None, test_exact_match=True, **match_kwargs):
    meth_kwargs = meth_kwargs or {}

    # test for class interface
    f = transform_cls(**meth_kwargs)
    scripted_fn = torch.jit.script(f)

    tensor, pil_img = _create_data(26, 34, channels, device=device)
    # set seed to reproduce the same transformation for tensor and PIL image
    torch.manual_seed(12)
    transformed_tensor = f(tensor)
    torch.manual_seed(12)
    transformed_pil_img = f(pil_img)
    if test_exact_match:
        _assert_equal_tensor_to_pil(transformed_tensor, transformed_pil_img, **match_kwargs)
    else:
        _assert_approx_equal_tensor_to_pil(transformed_tensor.float(), transformed_pil_img, **match_kwargs)

    torch.manual_seed(12)
    transformed_tensor_script = scripted_fn(tensor)
    assert_equal(transformed_tensor, transformed_tensor_script)

    batch_tensors = _create_data_batch(height=23, width=34, channels=channels, num_samples=4, device=device)
    _test_transform_vs_scripted_on_batch(f, scripted_fn, batch_tensors)

    with get_tmp_dir() as tmp_dir:
        scripted_fn.save(os.path.join(tmp_dir, f"t_{transform_cls.__name__}.pt"))
示例#22
0
def test_pad(device, dt, pad, config):
    script_fn = torch.jit.script(F.pad)
    tensor, pil_img = _create_data(7, 8, device=device)
    batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device)

    if dt == torch.float16 and device == "cpu":
        # skip float16 on CPU case
        return

    if dt is not None:
        # This is a trivial cast to float of uint8 data to test all cases
        tensor = tensor.to(dt)
        batch_tensors = batch_tensors.to(dt)

    pad_tensor = F_t.pad(tensor, pad, **config)
    pad_pil_img = F_pil.pad(pil_img, pad, **config)

    pad_tensor_8b = pad_tensor
    # we need to cast to uint8 to compare with PIL image
    if pad_tensor_8b.dtype != torch.uint8:
        pad_tensor_8b = pad_tensor_8b.to(torch.uint8)

    _assert_equal_tensor_to_pil(pad_tensor_8b, pad_pil_img, msg="{}, {}".format(pad, config))

    if isinstance(pad, int):
        script_pad = [pad, ]
    else:
        script_pad = pad
    pad_tensor_script = script_fn(tensor, script_pad, **config)
    assert_equal(pad_tensor, pad_tensor_script, msg="{}, {}".format(pad, config))

    _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config)
示例#23
0
def test_ten_crop(device):
    script_ten_crop = torch.jit.script(F.ten_crop)

    img_tensor, pil_img = _create_data(32, 34, device=device)

    cropped_pil_images = F.ten_crop(pil_img, [10, 11])

    cropped_tensors = F.ten_crop(img_tensor, [10, 11])
    for i in range(10):
        _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i])

    cropped_tensors = script_ten_crop(img_tensor, [10, 11])
    for i in range(10):
        _assert_equal_tensor_to_pil(cropped_tensors[i], cropped_pil_images[i])

    batch_tensors = _create_data_batch(16, 18, num_samples=4, device=device)
    tuple_transformed_batches = F.ten_crop(batch_tensors, [10, 11])
    for i in range(len(batch_tensors)):
        img_tensor = batch_tensors[i, ...]
        tuple_transformed_imgs = F.ten_crop(img_tensor, [10, 11])
        assert len(tuple_transformed_imgs) == len(tuple_transformed_batches)

        for j in range(len(tuple_transformed_imgs)):
            true_transformed_img = tuple_transformed_imgs[j]
            transformed_img = tuple_transformed_batches[j][i, ...]
            assert_equal(true_transformed_img, transformed_img)

    # scriptable function test
    s_tuple_transformed_batches = script_ten_crop(batch_tensors, [10, 11])
    for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches):
        assert_equal(transformed_batch, s_transformed_batch)
示例#24
0
 def test_transform_copy_targets(self):
     transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3))
     image = [torch.rand(3, 200, 300), torch.rand(3, 200, 200)]
     targets = [{"boxes": torch.rand(3, 4)}, {"boxes": torch.rand(2, 4)}]
     targets_copy = copy.deepcopy(targets)
     out = transform(image, targets)  # noqa: F841
     assert_equal(targets[0]["boxes"], targets_copy[0]["boxes"])
     assert_equal(targets[1]["boxes"], targets_copy[1]["boxes"])
示例#25
0
def test_read_1_bit_png_consistency(shape, mode):
    with get_tmp_dir() as root:
        image_path = os.path.join(root, f'test_{shape}.png')
        pixels = np.random.rand(*shape) > 0.5
        img = Image.fromarray(pixels)
        img.save(image_path)
        img1 = read_image(image_path, mode)
        img2 = read_image(image_path, mode)
        assert_equal(img1, img2)
示例#26
0
def test_read_1_bit_png(shape):
    with get_tmp_dir() as root:
        image_path = os.path.join(root, f'test_{shape}.png')
        pixels = np.random.rand(*shape) > 0.5
        img = Image.fromarray(pixels)
        img.save(image_path)
        img1 = read_image(image_path)
        img2 = normalize_dimensions(torch.as_tensor(pixels * 255, dtype=torch.uint8))
        assert_equal(img1, img2)
    def test_forward_negative_sample_retinanet(self):
        model = torchvision.models.detection.retinanet_resnet50_fpn(
            num_classes=2, min_size=100, max_size=100, pretrained_backbone=False
        )

        images, targets = self._make_empty_sample()
        loss_dict = model(images, targets)

        assert_equal(loss_dict["bbox_regression"], torch.tensor(0.0))
示例#28
0
def test_perspective_interpolation_warning():
    # assert changed type warning
    spoints = [[0, 0], [33, 0], [33, 25], [0, 25]]
    epoints = [[3, 2], [32, 3], [30, 24], [2, 25]]
    tensor = torch.randint(0, 256, (3, 26, 26))
    with pytest.warns(UserWarning, match="Argument interpolation should be of type InterpolationMode"):
        res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2)
        res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR)
        assert_equal(res1, res2)
示例#29
0
def test_read_1_bit_png_consistency(shape, mode, tmpdir):
    np_rng = np.random.RandomState(0)
    image_path = os.path.join(tmpdir, f"test_{shape}.png")
    pixels = np_rng.rand(*shape) > 0.5
    img = Image.fromarray(pixels)
    img.save(image_path)
    img1 = read_image(image_path, mode)
    img2 = read_image(image_path, mode)
    assert_equal(img1, img2)
    def test_forward_negative_sample_frcnn(self, name):
        model = torchvision.models.detection.__dict__[name](
            num_classes=2, min_size=100, max_size=100, pretrained_backbone=False
        )

        images, targets = self._make_empty_sample()
        loss_dict = model(images, targets)

        assert_equal(loss_dict["loss_box_reg"], torch.tensor(0.0))
        assert_equal(loss_dict["loss_rpn_box_reg"], torch.tensor(0.0))