Exemplo n.º 1
0
def sampling_rate(file: str) -> int:
    """Sampling rate of audio file.

    Args:
        file: file name of input audio file

    Returns:
        sampling rate of audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return soundfile.info(file).samplerate
    else:
        try:
            return int(sox.file_info.sample_rate(file))
        except sox.core.SoxiError:
            cmd = f'mediainfo --Inform="Audio;%SamplingRate%" "{file}"'
            sampling_rate = run(cmd)
            if sampling_rate:
                return int(sampling_rate)
            else:
                raise RuntimeError(broken_file_error(file))
Exemplo n.º 2
0
def channels(file: str) -> int:
    """Number of channels in audio file.

    Args:
        file: file name of input audio file

    Returns:
        number of channels in audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return soundfile.info(file).channels
    else:
        try:
            return int(sox.file_info.channels(file))
        except sox.core.SoxiError:
            # For MP4 stored and returned number of channels can be different
            cmd1 = f'mediainfo --Inform="Audio;%Channel(s)_Original%" "{file}"'
            cmd2 = f'mediainfo --Inform="Audio;%Channel(s)%" "{file}"'
            try:
                return int(run(cmd1))
            except ValueError:
                try:
                    return int(run(cmd2))
                except ValueError:
                    raise RuntimeError(broken_file_error(file))
Exemplo n.º 3
0
def samples(file: str) -> int:
    """Number of samples in audio file.

    Args:
        file: file name of input audio file

    Returns:
        number of samples in audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    def samples_as_int(file):
        return int(
            soundfile.info(file).duration * soundfile.info(file).samplerate)

    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return samples_as_int(file)
    else:
        # Always convert to WAV for non SNDFORMATS
        with tempfile.TemporaryDirectory(prefix='audiofile') as tmpdir:
            tmpfile = os.path.join(tmpdir, 'tmp.wav')
            convert_to_wav(file, tmpfile)
            return samples_as_int(tmpfile)
Exemplo n.º 4
0
def bit_depth(file: str) -> typing.Optional[int]:
    r"""Bit depth of audio file.

    For lossy audio files,
    ``None`` is returned as they have a varying bit depth.

    Args:
        file: file name of input audio file

    Returns:
        bit depth of audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    file_type = file_extension(file)
    if file_type == 'wav':
        precision_mapping = {
            'PCM_16': 16,
            'PCM_24': 24,
            'PCM_32': 32,
            'PCM_U8': 8,
            'FLOAT': 32,
            'DOUBLE': 64,
            'ULAW': 8,
            'ALAW': 8,
            'IMA_ADPCM': 4,
            'MS_ADPCM': 4,
            'GSM610': 16,  # not sure if this could be variable?
            'G721_32': 4,  # not sure if correct
        }
    elif file_type == 'flac':
        precision_mapping = {
            'PCM_16': 16,
            'PCM_24': 24,
            'PCM_32': 32,
            'PCM_S8': 8,
        }
    if file_extension(file) in ['wav', 'flac']:
        depth = precision_mapping[soundfile.info(file).subtype]
    else:
        depth = None

    return depth
Exemplo n.º 5
0
def duration(file: str, sloppy=False) -> float:
    """Duration in seconds of audio file.

    The default behavior (``sloppy=False``)
    ensures
    the duration in seconds
    matches the one in samples.
    To achieve this it first decodes files to WAV
    if needed, e.g. MP3 files.
    If you have different decoders
    on different machines,
    results might differ.

    The case ``sloppy=True`` returns the duration
    as reported in the header of the audio file.
    This is faster,
    but might still return different results
    on different machines
    as it depends on the installed software.
    If no duration information is provided in the header
    it will fall back to ``sloppy=False``.

    Args:
        file: file name of input audio file
        sloppy: if ``True`` report duration
            as stored in the header

    Returns:
        duration in seconds of audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return soundfile.info(file).duration

    if sloppy:
        try:
            duration = sox.file_info.duration(file)
            if duration is None:
                duration = 0.0

            return duration
        except sox.core.SoxiError:
            cmd = f'mediainfo --Inform="Audio;%Duration%" "{file}"'
            duration = run(cmd)
            if duration:
                # Convert to seconds, as mediainfo returns milliseconds
                return float(duration) / 1000

    return samples(file) / sampling_rate(file)
Exemplo n.º 6
0
def read(
        file: str,
        duration: float = None,
        offset: float = 0,
        always_2d: bool = False,
        dtype: str = 'float32',
        **kwargs,
) -> typing.Tuple[np.array, int]:
    """Read audio file.

    It uses :func:`soundfile.read` for WAV, FLAC, and OGG files.
    All other audio files are
    first converted to WAV by sox or ffmpeg.

    Args:
        file: file name of input audio file
        duration: return only a specified duration in seconds
        offset: start reading at offset in seconds
        always_2d: if ``True`` it always returns a two-dimensional signal
            even for mono sound files
        dtype: data type of returned signal,
            select from
            ``'float64'``,
            ``'float32'``,
            ``'int32'``,
            ``'int16'``
        kwargs: pass on further arguments to :func:`soundfile.read`

    Returns:
        * a two-dimensional array in the form
          ``[channels, samples]``.
          If the sound file has only one channel
          and ``always_2d=False``,
          a one-dimensional array is returned
        * sample rate of the audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    tmpdir = None
    if file_extension(file) not in SNDFORMATS:
        # Convert file formats not recognized by soundfile to WAV first.
        #
        # NOTE: this is faster than loading them with librosa directly.
        # In addition, librosa seems to have an issue with the precission of
        # the returned magnitude
        # (https://github.com/librosa/librosa/issues/811).
        #
        # It might be the case that MP3 files will be supported by soundfile in
        # the future as well. For a discussion on MP3 support in the underlying
        # libsndfile see https://github.com/erikd/libsndfile/issues/258.
        with tempfile.TemporaryDirectory(prefix='audiofile') as tmpdir:
            tmpfile = os.path.join(tmpdir, 'tmp.wav')
            convert_to_wav(file, tmpfile, offset, duration)
            signal, sample_rate = soundfile.read(
                tmpfile,
                dtype=dtype,
                always_2d=always_2d,
                **kwargs,
            )
    else:
        if duration is not None or offset > 0:
            sample_rate = sampling_rate(file)
        if offset > 0:
            offset = np.ceil(offset * sample_rate)  # samples
        if duration is not None:
            duration = int(np.ceil(duration * sample_rate) + offset)  # samples
        signal, sample_rate = soundfile.read(
            file,
            start=int(offset),
            stop=duration,
            dtype=dtype,
            always_2d=always_2d,
            **kwargs,
        )
    # [samples, channels] => [channels, samples]
    signal = signal.T
    return signal, sample_rate
Exemplo n.º 7
0
def write(
        file: str,
        signal: np.array,
        sampling_rate: int,
        bit_depth: int = 16,
        normalize: bool = False,
        **kwargs,
):
    """Write (normalized) audio files.

    Save audio data provided as an array of shape ``[channels, samples]``
    to a WAV, FLAC, or OGG file.
    ``channels`` can be up to 65535 for WAV,
    255 for OGG,
    and 8 for FLAC.
    For monaural audio the array can be one-dimensional.

    It uses :func:`soundfile.write` to write the audio files.

    Args:
        file: file name of output audio file.
            The format (WAV, FLAC, OGG) will be inferred from the file name
        signal: audio data to write
        sampling_rate: sample rate of the audio data
        bit_depth: bit depth of written file in bit,
            can be 8, 16, 24 for WAV and FLAC files,
            and in addition 32 for WAV files
        normalize: normalize audio data before writing
        kwargs: pass on further arguments to :func:`soundfile.write`

    Raises:
        RuntimeError: for non-supported bit depth or number of channels

    """
    file = audeer.safe_path(file)
    file_type = file_extension(file)

    # Check for allowed precisions
    if file_type == 'wav':
        depth_mapping = {
            8: 'PCM_U8',
            16: 'PCM_16',
            24: 'PCM_24',
            32: 'PCM_32',
        }
    elif file_type == 'flac':
        depth_mapping = {
            8: 'PCM_S8',
            16: 'PCM_16',
            24: 'PCM_24',
        }
    if file_type in ['wav', 'flac']:
        bit_depths = sorted(list(depth_mapping.keys()))
        if bit_depth not in bit_depths:
            raise RuntimeError(
                f'"bit_depth" has to be one of '
                f'{", ".join([str(b) for b in bit_depths])}.'
            )
        subtype = depth_mapping[bit_depth]
    else:
        subtype = None
    # Check if number of channels is allowed for chosen file type
    if signal.ndim > 1:
        channels = np.shape(signal)[0]
    else:
        channels = 1
    if channels > MAX_CHANNELS[file_type]:
        if file_type != 'wav':
            hint = " Consider using 'wav' instead."
        else:
            hint = ''
        raise RuntimeError(
            "The maximum number of allowed channels "
            f"for '{file_type}' is {MAX_CHANNELS[file_type]}.{hint}"
        )
    if normalize:
        signal = signal / np.max(np.abs(signal))
    soundfile.write(file, signal.T, sampling_rate, subtype=subtype, **kwargs)