Exemplo n.º 1
0
def load(filepath, out=None, normalization=None, num_frames=-1, offset=0):
    """Loads an audio file from disk into a Tensor

    Args:
        filepath (string): path to audio file
        out (Tensor, optional): an output Tensor to use instead of creating one
        normalization (bool or number, optional): If boolean `True`, then output is divided by `1 << 31`
                                                  (assumes 16-bit depth audio, and normalizes to `[0, 1]`.
                                                  If `number`, then output is divided by that number
        num_frames (int, optional): number of frames to load.  -1 to load everything after the offset.
        offset (int, optional): number of frames from the start of the file to begin data loading.

    Returns: tuple(Tensor, int)
       - Tensor: output Tensor of size `[L x C]` where L is the number of audio frames, C is the number of channels
       - int: the sample-rate of the audio (as listed in the metadata of the file)

    Example::

        >>> data, sample_rate = torchaudio.load('foo.mp3')
        >>> print(data.size())
        torch.Size([278756, 2])
        >>> print(sample_rate)
        44100

    """
    # check if valid file
    if not os.path.isfile(filepath):
        raise OSError("{} not found or is a directory".format(filepath))

    # initialize output tensor
    if out is not None:
        check_input(out)
    else:
        out = torch.FloatTensor()

    if num_frames < -1:
        raise ValueError(
            "Expected value for num_samples -1 (entire file) or >=0")
    if offset < 0:
        raise ValueError("Expected positive offset value")
    sample_rate = _torch_sox.read_audio_file(filepath, out, num_frames, offset)

    # normalize if needed
    if isinstance(normalization, bool) and normalization:
        out /= 1 << 31  # assuming 16-bit depth
    elif isinstance(normalization, (float, int)):
        out /= normalization  # normalize with custom value

    return out, sample_rate
Exemplo n.º 2
0
def load(filepath, out=None, normalization=None):
    """Loads an audio file from disk into a Tensor

    Args:
        filepath (string): path to audio file
        out (Tensor, optional): an output Tensor to use instead of creating one
        normalization (bool or number, optional): If boolean `True`, then output is divided by `1 << 31`
                                                  (assumes 16-bit depth audio, and normalizes to `[0, 1]`.
                                                  If `number`, then output is divided by that number

    Returns: tuple(Tensor, int)
       - Tensor: output Tensor of size `[L x C]` where L is the number of audio frames, C is the number of channels
       - int: the sample-rate of the audio (as listed in the metadata of the file)

    Example::

        >>> data, sample_rate = torchaudio.load('foo.mp3')
        >>> print(data.size())
        torch.Size([278756, 2])
        >>> print(sample_rate)
        44100

    """
    # check if valid file
    if not os.path.isfile(filepath):
        raise OSError("{} not found or is a directory".format(filepath))

    # initialize output tensor
    if out is not None:
        check_input(out)
    else:
        out = torch.FloatTensor()

    sample_rate = _torch_sox.read_audio_file(filepath, out)
    # normalize if needed
    if isinstance(normalization, bool) and normalization:
        out /= 1 << 31  # assuming 16-bit depth
    elif isinstance(normalization, (float, int)):
        out /= normalization  # normalize with custom value

    return out, sample_rate
Exemplo n.º 3
0
def load(
    filepath,
    out=None,
    normalization=True,
    channels_first=True,
    num_frames=0,
    offset=0,
    signalinfo=None,
    encodinginfo=None,
    filetype=None,
):
    r"""See torchaudio.load"""

    # stringify if `pathlib.Path` (noop if already `str`)
    filepath = str(filepath)
    # check if valid file
    if not os.path.isfile(filepath):
        raise OSError("{} not found or is a directory".format(filepath))

    # initialize output tensor
    if out is not None:
        torchaudio.check_input(out)
    else:
        out = torch.FloatTensor()

    if num_frames < -1:
        raise ValueError(
            "Expected value for num_samples -1 (entire file) or >=0")
    if offset < 0:
        raise ValueError("Expected positive offset value")

    import _torch_sox
    sample_rate = _torch_sox.read_audio_file(filepath, out, channels_first,
                                             num_frames, offset, signalinfo,
                                             encodinginfo, filetype)

    # normalize if needed
    torchaudio._audio_normalization(out, normalization)

    return out, sample_rate
Exemplo n.º 4
0
def load(filepath,
         out=None,
         normalization=True,
         channels_first=True,
         num_frames=0,
         offset=0,
         signalinfo=None,
         encodinginfo=None,
         filetype=None):
    r"""Loads an audio file from disk into a tensor

    Args:
        filepath (str or pathlib.Path): Path to audio file
        out (torch.Tensor, optional): An output tensor to use instead of creating one. (Default: ``None``)
        normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31`
            (assumes signed 32-bit audio), and normalizes to `[-1, 1]`.
            If `number`, then output is divided by that number
            If `callable`, then the output is passed as a parameter
            to the given function, then the output is divided by
            the result. (Default: ``True``)
        channels_first (bool): Set channels first or length first in result. (Default: ``True``)
        num_frames (int, optional): Number of frames to load.  0 to load everything after the offset.
            (Default: ``0``)
        offset (int, optional): Number of frames from the start of the file to begin data loading.
            (Default: ``0``)
        signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
            audio type cannot be automatically determined. (Default: ``None``)
        encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
            audio type cannot be automatically determined. (Default: ``None``)
        filetype (str, optional): A filetype or extension to be set if sox cannot determine it
            automatically. (Default: ``None``)

    Returns:
        Tuple[torch.Tensor, int]: An output tensor of size `[C x L]` or `[L x C]` where L is the number
        of audio frames and C is the number of channels. An integer which is the sample rate of the
        audio (as listed in the metadata of the file)

    Example
        >>> data, sample_rate = torchaudio.load('foo.mp3')
        >>> print(data.size())
        torch.Size([2, 278756])
        >>> print(sample_rate)
        44100
        >>> data_vol_normalized, _ = torchaudio.load('foo.mp3', normalization=lambda x: torch.abs(x).max())
        >>> print(data_vol_normalized.abs().max())
        1.

    """
    # stringify if `pathlib.Path` (noop if already `str`)
    filepath = str(filepath)
    # check if valid file
    if not os.path.isfile(filepath):
        raise OSError("{} not found or is a directory".format(filepath))

    # initialize output tensor
    if out is not None:
        check_input(out)
    else:
        out = torch.FloatTensor()

    if num_frames < -1:
        raise ValueError(
            "Expected value for num_samples -1 (entire file) or >=0")
    if offset < 0:
        raise ValueError("Expected positive offset value")

    sample_rate = _torch_sox.read_audio_file(filepath, out, channels_first,
                                             num_frames, offset, signalinfo,
                                             encodinginfo, filetype)

    # normalize if needed
    _audio_normalization(out, normalization)

    return out, sample_rate