def test_invalid_positional_args(self, device, dtype): bytes = get_dtype_size(dtype) in_bytes = SIZE * bytes # Empty array with self.assertRaisesRegex(ValueError, r"both buffer length \(0\) and count"): empty = numpy.array([]) torch.frombuffer(empty, dtype=dtype) # Count equals 0 with self.assertRaisesRegex(ValueError, r"both buffer length .* and count \(0\)"): self._run_test(SHAPE, dtype, count=0) # Offset negative and bigger than total length with self.assertRaisesRegex(ValueError, rf"offset \(-{bytes} bytes\) must be"): self._run_test(SHAPE, dtype, first=-1) with self.assertRaisesRegex(ValueError, rf"offset \({in_bytes} bytes\) must be .* " rf"buffer length \({in_bytes} bytes\)"): self._run_test(SHAPE, dtype, first=SIZE) # Non-multiple offset with all elements if bytes > 1: offset = bytes - 1 with self.assertRaisesRegex(ValueError, rf"buffer length \({in_bytes - offset} bytes\) after " rf"offset \({offset} bytes\) must be"): self._run_test(SHAPE, dtype, offset=bytes - 1) # Count too big for each good first element for first in range(SIZE): count = SIZE - first + 1 with self.assertRaisesRegex(ValueError, rf"requested buffer length \({count} \* {bytes} bytes\) " rf"after offset \({first * bytes} bytes\) must .*" rf"buffer length \({in_bytes} bytes\)"): self._run_test(SHAPE, dtype, count=count, first=first)
def read_image(img: Union[str, bytes, BytesIO, torch.Tensor], num_channels: Optional[int] = None) -> torch.Tensor: """Returns a tensor with CHW format. If num_channels is not provided, the image is read in unchanged format. Returns None if the image could not be read. """ if isinstance(img, torch.Tensor): return img if isinstance(img, str): return read_image_from_str(img, num_channels) if isinstance(img, bytes): with BytesIO(img) as buffer: buffer_view = buffer.getbuffer() image_tensor = decode_image( torch.frombuffer(buffer_view, dtype=torch.uint8)) del buffer_view return image_tensor if isinstance(img, BytesIO): buffer_view = img.getbuffer() try: image_tensor = decode_image( torch.frombuffer(buffer_view, dtype=torch.uint8)) del buffer_view return image_tensor except RuntimeError as e: logger.warning( f"Encountered torchvision error while reading {img}: {e}") logger.warning(f"Could not read image {img}, unsupported type {type(img)}")
def read_sn3_pascalvincent_tensor(path: str, strict: bool = True) -> torch.Tensor: """Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh'). Argument may be a filename, compressed filename, or file object. """ # read with open(path, "rb") as f: data = f.read() # parse magic = get_int(data[0:4]) nd = magic % 256 ty = magic // 256 assert 1 <= nd <= 3 assert 8 <= ty <= 14 torch_type = SN3_PASCALVINCENT_TYPEMAP[ty] s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)] num_bytes_per_value = torch.iinfo(torch_type).bits // 8 # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default, # we need to reverse the bytes before we can read them with torch.frombuffer(). needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1 parsed = torch.frombuffer(bytearray(data), dtype=torch_type, offset=(4 * (nd + 1))) if needs_byte_reversal: parsed = parsed.flip(0) assert parsed.shape[0] == np.prod(s) or not strict return parsed.view(*s)
def _get_video_tensor(video_dir, video_file): """open a video file, and represent the video data by a PT tensor""" full_path = os.path.join(video_dir, video_file) assert os.path.exists(full_path), "File not found: %s" % full_path with open(full_path, "rb") as fp: video_tensor = torch.frombuffer(fp.read(), dtype=torch.uint8) return full_path, video_tensor
def test_encode_jpeg(img_path): img = read_image(img_path) pil_img = F.to_pil_image(img) buf = io.BytesIO() pil_img.save(buf, format="JPEG", quality=75) encoded_jpeg_pil = torch.frombuffer(buf.getvalue(), dtype=torch.uint8) for src_img in [img, img.contiguous()]: encoded_jpeg_torch = encode_jpeg(src_img, quality=75) assert_equal(encoded_jpeg_torch, encoded_jpeg_pil)
def _probe_video_from_memory(video_data: torch.Tensor, ) -> VideoMetaData: """ Probe a video in memory and return VideoMetaData with info about the video This function is torchscriptable """ if not isinstance(video_data, torch.Tensor): video_data = torch.frombuffer(video_data, dtype=torch.uint8) result = torch.ops.video_reader.probe_video_from_memory(video_data) vtimebase, vfps, vduration, atimebase, asample_rate, aduration = result info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) return info
def _run_test(self, shape, dtype, count=-1, first=0, offset=None, **kwargs): numpy_dtype = common.torch_to_numpy_dtype_dict[dtype] if offset is None: offset = first * get_dtype_size(dtype) numpy_original = common.make_tensor(shape, torch.device("cpu"), dtype).numpy() original = memoryview(numpy_original) # First call PyTorch's version in case of errors. # If this call exits successfully, the NumPy version must also do so. torch_frombuffer = torch.frombuffer(original, dtype=dtype, count=count, offset=offset, **kwargs) numpy_frombuffer = numpy.frombuffer(original, dtype=numpy_dtype, count=count, offset=offset) self.assertEqual(numpy_frombuffer, torch_frombuffer) self.assertEqual(numpy_frombuffer.__array_interface__["data"][0], torch_frombuffer.data_ptr()) return (numpy_original, torch_frombuffer)
def read_image_as_png( bytes_obj: Optional[bytes] = None, mode: ImageReadMode = ImageReadMode.UNCHANGED ) -> Optional[torch.Tensor]: """Reads image from bytes object from a PNG file.""" try: with BytesIO(bytes_obj) as buffer: buffer_view = buffer.getbuffer() if len(buffer_view) == 0: del buffer_view raise Exception( "Bytes object is empty. This could be due to a failed load from storage." ) image = decode_image(torch.frombuffer(buffer_view, dtype=torch.uint8), mode=mode) del buffer_view return image except Exception as e: warnings.warn( f"Failed to read image from PNG file. Original exception: {e}") return None
def _read_video_timestamps_from_memory( video_data: torch.Tensor, ) -> Tuple[List[int], List[int], VideoMetaData]: """ Decode all frames in the video. Only pts (presentation timestamp) is returned. The actual frame pixel data is not copied. Thus, read_video_timestamps(...) is much faster than read_video(...) """ if not isinstance(video_data, torch.Tensor): video_data = torch.frombuffer(video_data, dtype=torch.uint8) result = torch.ops.video_reader.read_video_from_memory( video_data, 0, # seek_frame_margin 1, # getPtsOnly 1, # read_video_stream 0, # video_width 0, # video_height 0, # video_min_dimension 0, # video_max_dimension 0, # video_start_pts -1, # video_end_pts 0, # video_timebase_num 1, # video_timebase_den 1, # read_audio_stream 0, # audio_samples 0, # audio_channels 0, # audio_start_pts -1, # audio_end_pts 0, # audio_timebase_num 1, # audio_timebase_den ) _vframes, vframe_pts, vtimebase, vfps, vduration, _aframes, aframe_pts, atimebase, asample_rate, aduration = result info = _fill_info(vtimebase, vfps, vduration, atimebase, asample_rate, aduration) vframe_pts = vframe_pts.numpy().tolist() aframe_pts = aframe_pts.numpy().tolist() return vframe_pts, aframe_pts, info
def _read_image_buffer(v): # read bytes sent via REST API and convert to image tensor # in [channels, height, width] format byte_string = io.BytesIO(v.file.read()).read() image = decode_image(torch.frombuffer(byte_string, dtype=torch.uint8)) return image # channels, height, width
def test_byte_to_int(self): byte_array = numpy.array([-1, 0, 0, 0, -1, 0, 0, 0], dtype=numpy.byte) tensor = torch.frombuffer(byte_array, dtype=torch.int32) self.assertEqual(tensor.numel(), 2) # Assuming little endian machine self.assertSequenceEqual(tensor, [255, 255])
def test_non_writable_buffer(self, device, dtype): numpy_arr = common.make_tensor((1,), device, dtype).numpy() byte_arr = numpy_arr.tobytes() with self.assertWarnsOnceRegex(UserWarning, r"The given buffer is not writable."): torch.frombuffer(byte_arr, dtype=dtype)
def test_not_a_buffer(self, device, dtype): with self.assertRaisesRegex(ValueError, r"object does not implement Python buffer protocol."): torch.frombuffer([1, 2, 3, 4], dtype=dtype)
def _read_video_from_memory( video_data: torch.Tensor, seek_frame_margin: float = 0.25, read_video_stream: int = 1, video_width: int = 0, video_height: int = 0, video_min_dimension: int = 0, video_max_dimension: int = 0, video_pts_range: Tuple[int, int] = (0, -1), video_timebase_numerator: int = 0, video_timebase_denominator: int = 1, read_audio_stream: int = 1, audio_samples: int = 0, audio_channels: int = 0, audio_pts_range: Tuple[int, int] = (0, -1), audio_timebase_numerator: int = 0, audio_timebase_denominator: int = 1, ) -> Tuple[torch.Tensor, torch.Tensor]: """ Reads a video from memory, returning both the video frames as well as the audio frames This function is torchscriptable. Args: video_data (data type could be 1) torch.Tensor, dtype=torch.int8 or 2) python bytes): compressed video content stored in either 1) torch.Tensor 2) python bytes seek_frame_margin (double, optional): seeking frame in the stream is imprecise. Thus, when video_start_pts is specified, we seek the pts earlier by seek_frame_margin seconds read_video_stream (int, optional): whether read video stream. If yes, set to 1. Otherwise, 0 video_width/video_height/video_min_dimension/video_max_dimension (int): together decide the size of decoded frames: - When video_width = 0, video_height = 0, video_min_dimension = 0, and video_max_dimension = 0, keep the original frame resolution - When video_width = 0, video_height = 0, video_min_dimension != 0, and video_max_dimension = 0, keep the aspect ratio and resize the frame so that shorter edge size is video_min_dimension - When video_width = 0, video_height = 0, video_min_dimension = 0, and video_max_dimension != 0, keep the aspect ratio and resize the frame so that longer edge size is video_max_dimension - When video_width = 0, video_height = 0, video_min_dimension != 0, and video_max_dimension != 0, resize the frame so that shorter edge size is video_min_dimension, and longer edge size is video_max_dimension. The aspect ratio may not be preserved - When video_width = 0, video_height != 0, video_min_dimension = 0, and video_max_dimension = 0, keep the aspect ratio and resize the frame so that frame video_height is $video_height - When video_width != 0, video_height == 0, video_min_dimension = 0, and video_max_dimension = 0, keep the aspect ratio and resize the frame so that frame video_width is $video_width - When video_width != 0, video_height != 0, video_min_dimension = 0, and video_max_dimension = 0, resize the frame so that frame video_width and video_height are set to $video_width and $video_height, respectively video_pts_range (list(int), optional): the start and end presentation timestamp of video stream video_timebase_numerator / video_timebase_denominator (float, optional): a rational number which denotes timebase in video stream read_audio_stream (int, optional): whether read audio stream. If yes, set to 1. Otherwise, 0 audio_samples (int, optional): audio sampling rate audio_channels (int optional): audio audio_channels audio_pts_range (list(int), optional): the start and end presentation timestamp of audio stream audio_timebase_numerator / audio_timebase_denominator (float, optional): a rational number which denotes time base in audio stream Returns: vframes (Tensor[T, H, W, C]): the `T` video frames aframes (Tensor[L, K]): the audio frames, where `L` is the number of points and `K` is the number of channels """ _validate_pts(video_pts_range) _validate_pts(audio_pts_range) if not isinstance(video_data, torch.Tensor): video_data = torch.frombuffer(video_data, dtype=torch.uint8) result = torch.ops.video_reader.read_video_from_memory( video_data, seek_frame_margin, 0, # getPtsOnly read_video_stream, video_width, video_height, video_min_dimension, video_max_dimension, video_pts_range[0], video_pts_range[1], video_timebase_numerator, video_timebase_denominator, read_audio_stream, audio_samples, audio_channels, audio_pts_range[0], audio_pts_range[1], audio_timebase_numerator, audio_timebase_denominator, ) vframes, _vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, atimebase, asample_rate, aduration = result if aframes.numel() > 0: # when audio stream is found aframes = _align_audio_frames(aframes, aframe_pts, audio_pts_range) return vframes, aframes