def test_invalid_positional_args(self, device, dtype):
     bytes = get_dtype_size(dtype)
     in_bytes = SIZE * bytes
     # Empty array
     with self.assertRaisesRegex(ValueError,
                                 r"both buffer length \(0\) and count"):
         empty = numpy.array([])
         torch.frombuffer(empty, dtype=dtype)
     # Count equals 0
     with self.assertRaisesRegex(ValueError,
                                 r"both buffer length .* and count \(0\)"):
         self._run_test(SHAPE, dtype, count=0)
     # Offset negative and bigger than total length
     with self.assertRaisesRegex(ValueError,
                                 rf"offset \(-{bytes} bytes\) must be"):
         self._run_test(SHAPE, dtype, first=-1)
     with self.assertRaisesRegex(ValueError,
                                 rf"offset \({in_bytes} bytes\) must be .* "
                                 rf"buffer length \({in_bytes} bytes\)"):
         self._run_test(SHAPE, dtype, first=SIZE)
     # Non-multiple offset with all elements
     if bytes > 1:
         offset = bytes - 1
         with self.assertRaisesRegex(ValueError,
                                     rf"buffer length \({in_bytes - offset} bytes\) after "
                                     rf"offset \({offset} bytes\) must be"):
             self._run_test(SHAPE, dtype, offset=bytes - 1)
     # Count too big for each good first element
     for first in range(SIZE):
         count = SIZE - first + 1
         with self.assertRaisesRegex(ValueError,
                                     rf"requested buffer length \({count} \* {bytes} bytes\) "
                                     rf"after offset \({first * bytes} bytes\) must .*"
                                     rf"buffer length \({in_bytes} bytes\)"):
             self._run_test(SHAPE, dtype, count=count, first=first)
Exemplo n.º 2
0
def read_image(img: Union[str, bytes, BytesIO, torch.Tensor],
               num_channels: Optional[int] = None) -> torch.Tensor:
    """Returns a tensor with CHW format.

    If num_channels is not provided, the image is read in unchanged format. Returns None if the image could not be read.
    """
    if isinstance(img, torch.Tensor):
        return img
    if isinstance(img, str):
        return read_image_from_str(img, num_channels)
    if isinstance(img, bytes):
        with BytesIO(img) as buffer:
            buffer_view = buffer.getbuffer()
            image_tensor = decode_image(
                torch.frombuffer(buffer_view, dtype=torch.uint8))
            del buffer_view
            return image_tensor
    if isinstance(img, BytesIO):
        buffer_view = img.getbuffer()
        try:
            image_tensor = decode_image(
                torch.frombuffer(buffer_view, dtype=torch.uint8))
            del buffer_view
            return image_tensor
        except RuntimeError as e:
            logger.warning(
                f"Encountered torchvision error while reading {img}: {e}")
    logger.warning(f"Could not read image {img}, unsupported type {type(img)}")
Exemplo n.º 3
0
def read_sn3_pascalvincent_tensor(path: str, strict: bool = True) -> torch.Tensor:
    """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh').
    Argument may be a filename, compressed filename, or file object.
    """
    # read
    with open(path, "rb") as f:
        data = f.read()
    # parse
    magic = get_int(data[0:4])
    nd = magic % 256
    ty = magic // 256
    assert 1 <= nd <= 3
    assert 8 <= ty <= 14
    torch_type = SN3_PASCALVINCENT_TYPEMAP[ty]
    s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)]

    num_bytes_per_value = torch.iinfo(torch_type).bits // 8
    # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default,
    # we need to reverse the bytes before we can read them with torch.frombuffer().
    needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1
    parsed = torch.frombuffer(bytearray(data), dtype=torch_type, offset=(4 * (nd + 1)))
    if needs_byte_reversal:
        parsed = parsed.flip(0)

    assert parsed.shape[0] == np.prod(s) or not strict
    return parsed.view(*s)
Exemplo n.º 4
0
def _get_video_tensor(video_dir, video_file):
    """open a video file, and represent the video data by a PT tensor"""
    full_path = os.path.join(video_dir, video_file)

    assert os.path.exists(full_path), "File not found: %s" % full_path

    with open(full_path, "rb") as fp:
        video_tensor = torch.frombuffer(fp.read(), dtype=torch.uint8)

    return full_path, video_tensor
Exemplo n.º 5
0
def test_encode_jpeg(img_path):
    img = read_image(img_path)

    pil_img = F.to_pil_image(img)
    buf = io.BytesIO()
    pil_img.save(buf, format="JPEG", quality=75)

    encoded_jpeg_pil = torch.frombuffer(buf.getvalue(), dtype=torch.uint8)

    for src_img in [img, img.contiguous()]:
        encoded_jpeg_torch = encode_jpeg(src_img, quality=75)
        assert_equal(encoded_jpeg_torch, encoded_jpeg_pil)
Exemplo n.º 6
0
def _probe_video_from_memory(video_data: torch.Tensor, ) -> VideoMetaData:
    """
    Probe a video in memory and return VideoMetaData with info about the video
    This function is torchscriptable
    """
    if not isinstance(video_data, torch.Tensor):
        video_data = torch.frombuffer(video_data, dtype=torch.uint8)
    result = torch.ops.video_reader.probe_video_from_memory(video_data)
    vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result
    info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate,
                      aduration)
    return info
    def _run_test(self, shape, dtype, count=-1, first=0, offset=None, **kwargs):
        numpy_dtype = common.torch_to_numpy_dtype_dict[dtype]

        if offset is None:
            offset = first * get_dtype_size(dtype)

        numpy_original = common.make_tensor(shape, torch.device("cpu"), dtype).numpy()
        original = memoryview(numpy_original)
        # First call PyTorch's version in case of errors.
        # If this call exits successfully, the NumPy version must also do so.
        torch_frombuffer = torch.frombuffer(original, dtype=dtype, count=count, offset=offset, **kwargs)
        numpy_frombuffer = numpy.frombuffer(original, dtype=numpy_dtype, count=count, offset=offset)

        self.assertEqual(numpy_frombuffer, torch_frombuffer)
        self.assertEqual(numpy_frombuffer.__array_interface__["data"][0], torch_frombuffer.data_ptr())
        return (numpy_original, torch_frombuffer)
Exemplo n.º 8
0
def read_image_as_png(
        bytes_obj: Optional[bytes] = None,
        mode: ImageReadMode = ImageReadMode.UNCHANGED
) -> Optional[torch.Tensor]:
    """Reads image from bytes object from a PNG file."""
    try:
        with BytesIO(bytes_obj) as buffer:
            buffer_view = buffer.getbuffer()
            if len(buffer_view) == 0:
                del buffer_view
                raise Exception(
                    "Bytes object is empty. This could be due to a failed load from storage."
                )
            image = decode_image(torch.frombuffer(buffer_view,
                                                  dtype=torch.uint8),
                                 mode=mode)
            del buffer_view
            return image
    except Exception as e:
        warnings.warn(
            f"Failed to read image from PNG file. Original exception: {e}")
        return None
Exemplo n.º 9
0
def _read_video_timestamps_from_memory(
    video_data: torch.Tensor,
) -> Tuple[List[int], List[int], VideoMetaData]:
    """
    Decode all frames in the video. Only pts (presentation timestamp) is returned.
    The actual frame pixel data is not copied. Thus, read_video_timestamps(...)
    is much faster than read_video(...)
    """
    if not isinstance(video_data, torch.Tensor):
        video_data = torch.frombuffer(video_data, dtype=torch.uint8)
    result = torch.ops.video_reader.read_video_from_memory(
        video_data,
        0,  # seek_frame_margin
        1,  # getPtsOnly
        1,  # read_video_stream
        0,  # video_width
        0,  # video_height
        0,  # video_min_dimension
        0,  # video_max_dimension
        0,  # video_start_pts
        -1,  # video_end_pts
        0,  # video_timebase_num
        1,  # video_timebase_den
        1,  # read_audio_stream
        0,  # audio_samples
        0,  # audio_channels
        0,  # audio_start_pts
        -1,  # audio_end_pts
        0,  # audio_timebase_num
        1,  # audio_timebase_den
    )
    _vframes, vframe_pts, vtimebase, vfps, vduration, _aframes, aframe_pts, atimebase, asample_rate, aduration = result
    info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration)

    vframe_pts = vframe_pts.numpy().tolist()
    aframe_pts = aframe_pts.numpy().tolist()
    return vframe_pts, aframe_pts, info
Exemplo n.º 10
0
def _read_image_buffer(v):
    # read bytes sent via REST API and convert to image tensor
    # in [channels, height, width] format
    byte_string = io.BytesIO(v.file.read()).read()
    image = decode_image(torch.frombuffer(byte_string, dtype=torch.uint8))
    return image  # channels, height, width
 def test_byte_to_int(self):
     byte_array = numpy.array([-1, 0, 0, 0, -1, 0, 0, 0], dtype=numpy.byte)
     tensor = torch.frombuffer(byte_array, dtype=torch.int32)
     self.assertEqual(tensor.numel(), 2)
     # Assuming little endian machine
     self.assertSequenceEqual(tensor, [255, 255])
 def test_non_writable_buffer(self, device, dtype):
     numpy_arr = common.make_tensor((1,), device, dtype).numpy()
     byte_arr = numpy_arr.tobytes()
     with self.assertWarnsOnceRegex(UserWarning,
                                    r"The given buffer is not writable."):
         torch.frombuffer(byte_arr, dtype=dtype)
 def test_not_a_buffer(self, device, dtype):
     with self.assertRaisesRegex(ValueError,
                                 r"object does not implement Python buffer protocol."):
         torch.frombuffer([1, 2, 3, 4], dtype=dtype)
Exemplo n.º 14
0
def _read_video_from_memory(
    video_data: torch.Tensor,
    seek_frame_margin: float = 0.25,
    read_video_stream: int = 1,
    video_width: int = 0,
    video_height: int = 0,
    video_min_dimension: int = 0,
    video_max_dimension: int = 0,
    video_pts_range: Tuple[int, int] = (0, -1),
    video_timebase_numerator: int = 0,
    video_timebase_denominator: int = 1,
    read_audio_stream: int = 1,
    audio_samples: int = 0,
    audio_channels: int = 0,
    audio_pts_range: Tuple[int, int] = (0, -1),
    audio_timebase_numerator: int = 0,
    audio_timebase_denominator: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Reads a video from memory, returning both the video frames as well as
    the audio frames
    This function is torchscriptable.

    Args:
    video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes):
        compressed video content stored in either 1) torch.Tensor 2) python bytes
    seek_frame_margin (double, optional): seeking frame in the stream is imprecise.
        Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds
    read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0
    video_width/video_height/video_min_dimension/video_max_dimension (int): together decide
        the size of decoded frames:

            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the original frame resolution
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension = 0, keep the aspect ratio and resize the
                frame so that shorter edge size is video_min_dimension
            - When video_width = 0, video_height = 0, video_min_dimension = 0,
                and video_max_dimension != 0, keep the aspect ratio and resize
                the frame so that longer edge size is video_max_dimension
            - When video_width = 0, video_height = 0, video_min_dimension != 0,
                and video_max_dimension != 0, resize the frame so that shorter
                edge size is video_min_dimension, and longer edge size is
                video_max_dimension. The aspect ratio may not be preserved
            - When video_width = 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_height is $video_height
            - When video_width != 0, video_height == 0, video_min_dimension = 0,
                and video_max_dimension = 0, keep the aspect ratio and resize
                the frame so that frame video_width is $video_width
            - When video_width != 0, video_height != 0, video_min_dimension = 0,
                and video_max_dimension = 0, resize the frame so that frame
                video_width and  video_height are set to $video_width and
                $video_height, respectively
    video_pts_range (list(int), optional): the start and end presentation timestamp of video stream
    video_timebase_numerator / video_timebase_denominator (float, optional): a rational
        number which denotes timebase in video stream
    read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0
    audio_samples (int, optional): audio sampling rate
    audio_channels (int optional): audio audio_channels
    audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream
    audio_timebase_numerator / audio_timebase_denominator (float, optional):
        a rational number which denotes time base in audio stream

    Returns:
        vframes (Tensor[T, H, W, C]): the `T` video frames
        aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and
            `K` is the number of channels
    """

    _validate_pts(video_pts_range)
    _validate_pts(audio_pts_range)

    if not isinstance(video_data, torch.Tensor):
        video_data = torch.frombuffer(video_data, dtype=torch.uint8)

    result = torch.ops.video_reader.read_video_from_memory(
        video_data,
        seek_frame_margin,
        0,  # getPtsOnly
        read_video_stream,
        video_width,
        video_height,
        video_min_dimension,
        video_max_dimension,
        video_pts_range[0],
        video_pts_range[1],
        video_timebase_numerator,
        video_timebase_denominator,
        read_audio_stream,
        audio_samples,
        audio_channels,
        audio_pts_range[0],
        audio_pts_range[1],
        audio_timebase_numerator,
        audio_timebase_denominator,
    )

    vframes, _vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, atimebase, asample_rate, aduration = result

    if aframes.numel() > 0:
        # when audio stream is found
        aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range)

    return vframes, aframes