def load(filepath, out=None, normalization=None, num_frames=-1, offset=0): """Loads an audio file from disk into a Tensor Args: filepath (string): path to audio file out (Tensor, optional): an output Tensor to use instead of creating one normalization (bool or number, optional): If boolean `True`, then output is divided by `1 << 31` (assumes 16-bit depth audio, and normalizes to `[0, 1]`. If `number`, then output is divided by that number num_frames (int, optional): number of frames to load. -1 to load everything after the offset. offset (int, optional): number of frames from the start of the file to begin data loading. Returns: tuple(Tensor, int) - Tensor: output Tensor of size `[L x C]` where L is the number of audio frames, C is the number of channels - int: the sample-rate of the audio (as listed in the metadata of the file) Example:: >>> data, sample_rate = torchaudio.load('foo.mp3') >>> print(data.size()) torch.Size([278756, 2]) >>> print(sample_rate) 44100 """ # check if valid file if not os.path.isfile(filepath): raise OSError("{} not found or is a directory".format(filepath)) # initialize output tensor if out is not None: check_input(out) else: out = torch.FloatTensor() if num_frames < -1: raise ValueError( "Expected value for num_samples -1 (entire file) or >=0") if offset < 0: raise ValueError("Expected positive offset value") sample_rate = _torch_sox.read_audio_file(filepath, out, num_frames, offset) # normalize if needed if isinstance(normalization, bool) and normalization: out /= 1 << 31 # assuming 16-bit depth elif isinstance(normalization, (float, int)): out /= normalization # normalize with custom value return out, sample_rate
def load(filepath, out=None, normalization=None): """Loads an audio file from disk into a Tensor Args: filepath (string): path to audio file out (Tensor, optional): an output Tensor to use instead of creating one normalization (bool or number, optional): If boolean `True`, then output is divided by `1 << 31` (assumes 16-bit depth audio, and normalizes to `[0, 1]`. If `number`, then output is divided by that number Returns: tuple(Tensor, int) - Tensor: output Tensor of size `[L x C]` where L is the number of audio frames, C is the number of channels - int: the sample-rate of the audio (as listed in the metadata of the file) Example:: >>> data, sample_rate = torchaudio.load('foo.mp3') >>> print(data.size()) torch.Size([278756, 2]) >>> print(sample_rate) 44100 """ # check if valid file if not os.path.isfile(filepath): raise OSError("{} not found or is a directory".format(filepath)) # initialize output tensor if out is not None: check_input(out) else: out = torch.FloatTensor() sample_rate = _torch_sox.read_audio_file(filepath, out) # normalize if needed if isinstance(normalization, bool) and normalization: out /= 1 << 31 # assuming 16-bit depth elif isinstance(normalization, (float, int)): out /= normalization # normalize with custom value return out, sample_rate
def load( filepath, out=None, normalization=True, channels_first=True, num_frames=0, offset=0, signalinfo=None, encodinginfo=None, filetype=None, ): r"""See torchaudio.load""" # stringify if `pathlib.Path` (noop if already `str`) filepath = str(filepath) # check if valid file if not os.path.isfile(filepath): raise OSError("{} not found or is a directory".format(filepath)) # initialize output tensor if out is not None: torchaudio.check_input(out) else: out = torch.FloatTensor() if num_frames < -1: raise ValueError( "Expected value for num_samples -1 (entire file) or >=0") if offset < 0: raise ValueError("Expected positive offset value") import _torch_sox sample_rate = _torch_sox.read_audio_file(filepath, out, channels_first, num_frames, offset, signalinfo, encodinginfo, filetype) # normalize if needed torchaudio._audio_normalization(out, normalization) return out, sample_rate
def load(filepath, out=None, normalization=True, channels_first=True, num_frames=0, offset=0, signalinfo=None, encodinginfo=None, filetype=None): r"""Loads an audio file from disk into a tensor Args: filepath (str or pathlib.Path): Path to audio file out (torch.Tensor, optional): An output tensor to use instead of creating one. (Default: ``None``) normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31` (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `number`, then output is divided by that number If `callable`, then the output is passed as a parameter to the given function, then the output is divided by the result. (Default: ``True``) channels_first (bool): Set channels first or length first in result. (Default: ``True``) num_frames (int, optional): Number of frames to load. 0 to load everything after the offset. (Default: ``0``) offset (int, optional): Number of frames from the start of the file to begin data loading. (Default: ``0``) signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the audio type cannot be automatically determined. (Default: ``None``) encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the audio type cannot be automatically determined. (Default: ``None``) filetype (str, optional): A filetype or extension to be set if sox cannot determine it automatically. (Default: ``None``) Returns: Tuple[torch.Tensor, int]: An output tensor of size `[C x L]` or `[L x C]` where L is the number of audio frames and C is the number of channels. An integer which is the sample rate of the audio (as listed in the metadata of the file) Example >>> data, sample_rate = torchaudio.load('foo.mp3') >>> print(data.size()) torch.Size([2, 278756]) >>> print(sample_rate) 44100 >>> data_vol_normalized, _ = torchaudio.load('foo.mp3', normalization=lambda x: torch.abs(x).max()) >>> print(data_vol_normalized.abs().max()) 1. """ # stringify if `pathlib.Path` (noop if already `str`) filepath = str(filepath) # check if valid file if not os.path.isfile(filepath): raise OSError("{} not found or is a directory".format(filepath)) # initialize output tensor if out is not None: check_input(out) else: out = torch.FloatTensor() if num_frames < -1: raise ValueError( "Expected value for num_samples -1 (entire file) or >=0") if offset < 0: raise ValueError("Expected positive offset value") sample_rate = _torch_sox.read_audio_file(filepath, out, channels_first, num_frames, offset, signalinfo, encodinginfo, filetype) # normalize if needed _audio_normalization(out, normalization) return out, sample_rate