Exemplo n.º 1
0
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.
    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = len(sf)
        file_format = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = sf.nframes
        file_format = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1
        samples = signal.shape[0]
        file_format = str(signal.dtype)

    return signal, sample_rate, channels
Exemplo n.º 2
0
def readWave(audio_path,
             start_frame,
             end_frame,
             mono=True,
             sample_rate=None,
             clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    snd_file.seek(start_frame)
    audio = snd_file.read(end_frame - start_frame, dtype='float32')
    snd_file.close()
    audio = audio.T  # Tuple to numpy, transpose axis to (channels, frames)

    # Convert to mono if desired
    if mono and len(audio.shape) > 1 and audio.shape[0] > 1:
        audio = np.mean(audio, axis=0)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        audio = librosa.resample(audio,
                                 audio_sr,
                                 sample_rate,
                                 res_type="kaiser_fast")
        audio_sr = sample_rate

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr
Exemplo n.º 3
0
def load_dict(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.

    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    soundfile = {}
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        soundfile['signal'] = sf.read()
        soundfile['channels'] = sf.channels
        soundfile['fs'] = sf.samplerate
        soundfile['samples'] = len(sf)
        soundfile['format'] = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        soundfile['signal'] = sf.read_frames(sf.nframes)
        soundfile['channels'] = sf.channels
        soundfile['fs'] = sf.samplerate
        soundfile['samples'] = sf.nframes
        soundfile['format'] = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        soundfile['fs'], soundfile['signal'] = read(filename)
        try:
            soundfile['channels'] = soundfile['signal'].shape[1]
        except IndexError:
            soundfile['channels'] = 1
        soundfile['samples'] = soundfile['signal'].shape[0]
        soundfile['format'] = str(soundfile['signal'].dtype)

    return soundfile
Exemplo n.º 4
0
def load(filename):
    """
    Load a wave file and return the signal, sample rate and number of channels.

    Can be any format supported by the underlying library (libsndfile or SciPy)
    """
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1

    return signal, sample_rate, channels
Exemplo n.º 5
0
def read_signal(filename, offset=0, nsamples=-1, nchannels=0, offset_is_samples=False):
    """Read a wavefile and return as numpy array of floats.

    Args:
        filename (string): Name of file to read
        offset (int, optional): Offset in samples or seconds (from start). Defaults to 0.
        nchannels: expected number of channel (default: 0 = any number OK)
        offset_is_samples (bool): measurement units for offset (default: False)
    Returns:
        ndarray: audio signal
    """
    try:
        wave_file = SoundFile(filename)
    except:
        # Ensure incorrect error (24 bit) is not generated
        raise Exception(f"Unable to read {filename}.")

    if nchannels != 0 and wave_file.channels != nchannels:
        raise Exception(
            f"Wav file ({filename}) was expected to have {nchannels} channels."
        )

    if wave_file.samplerate != CONFIG.fs:
        raise Exception(f"Sampling rate is not {CONFIG.fs} for filename {filename}.")

    if not offset_is_samples:  # Default behaviour
        offset = int(offset * wave_file.samplerate)

    if offset != 0:
        wave_file.seek(offset)

    x = wave_file.read(frames=nsamples)

    return x
Exemplo n.º 6
0
def readWave(audio_path,
             start_frame,
             end_frame,
             mono=True,
             sample_rate=None,
             clip=True):
    snd_file = SoundFile(audio_path, mode='r')
    inf = snd_file._info
    audio_sr = inf.samplerate

    start_read = max(start_frame, 0)
    pad_front = -min(start_frame, 0)
    end_read = min(end_frame, inf.frames)
    pad_back = max(end_frame - inf.frames, 0)

    snd_file.seek(start_read)
    audio = snd_file.read(end_read - start_read,
                          dtype='float32',
                          always_2d=True)  # (num_frames, channels)
    snd_file.close()

    # Pad if necessary (start_frame or end_frame out of bounds)
    audio = np.pad(audio, [(pad_front, pad_back), (0, 0)],
                   mode="constant",
                   constant_values=0.0)

    # Convert to mono if desired
    if mono:
        audio = np.mean(audio, axis=1, keepdims=True)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        res_length = int(
            np.ceil(
                float(audio.shape[0]) * float(sample_rate) / float(audio_sr)))
        audio = np.pad(audio, [(1, 1), (0, 0)],
                       mode="reflect")  # Pad audio first
        audio = librosa.resample(audio.T,
                                 audio_sr,
                                 sample_rate,
                                 res_type="kaiser_fast").T
        skip = (audio.shape[0] - res_length) // 2
        audio = audio[skip:skip + res_length, :]

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    return audio, audio_sr
Exemplo n.º 7
0
def read_audio_segment(file, pos, length):
    myfile = SoundFile(file)
    myfile.seek(pos)
    audio = myfile.read(length)
    myfile.close()
    return audio
Exemplo n.º 8
0
def analyze(filename):
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = len(sf)
        file_format = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = sf.nframes
        file_format = sf.format
        sf.close()
    elif wav_loader == 'scipy':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1
        samples = signal.shape[0]
        file_format = str(signal.dtype)

        # Scale common formats
        # Other bit depths (24, 20) are not handled by SciPy correctly.
        if file_format == 'int16':
            signal = signal.astype(float) / (2**15)
        elif file_format == 'uint8':
            signal = (signal.astype(float) - 128) / (2**7)
        elif file_format == 'int32':
            signal = signal.astype(float) / (2**31)
        elif file_format == 'float32':
            pass
        else:
            raise Exception("Don't know how to handle file "
                            "format {}".format(file_format))

    else:
        raise Exception("wav_loader has failed")

    header = 'dBFS values are relative to a full-scale square wave'

    if samples / sample_rate >= 1:
        length = str(samples / sample_rate) + ' seconds'
    else:
        length = str(samples / sample_rate * 1000) + ' milliseconds'

    results = [
        'Properties for "' + filename + '"',
        str(file_format),
        'Channels:\t%d' % channels,
        'Sampling rate:\t%d Hz' % sample_rate,
        'Samples:\t%d' % samples,
        'Length: \t' + length,
        '-----------------',
    ]

    if channels == 1:
        # Monaural
        results += properties(signal, sample_rate)
    elif channels == 2:
        # Stereo
        if array_equal(signal[:, 0], signal[:, 1]):
            results += ['Left and Right channels are identical:']
            results += properties(signal[:, 0], sample_rate)
        else:
            results += ['Left channel:']
            results += properties(signal[:, 0], sample_rate)
            results += ['Right channel:']
            results += properties(signal[:, 1], sample_rate)
    else:
        # Multi-channel
        for ch_no, channel in enumerate(signal.transpose()):
            results += ['Channel %d:' % (ch_no + 1)]
            results += properties(channel, sample_rate)

    display(header, results)

    plot_histogram = False
    if plot_histogram:
        histogram(signal)
Exemplo n.º 9
0
def readAudio(audio_path,
              offset=0.0,
              duration=None,
              mono=True,
              sample_rate=None,
              clip=True,
              padding_duration=0.0,
              metadata=None):
    '''
    Reads an audio file wholly or partly, and optionally converts it to mono and changes sampling rate.
    By default, it loads the whole audio file. If the offset is set to None, the duration HAS to be not None,
    and the offset is then randomly determined so that a random section of the audio is selected with the desired duration.
    Optionally, the file can be zero-padded by a certain amount of seconds at the start and end before selecting this random section.

    :param audio_path: Path to audio file
    :param offset: Position in audio file (s) where to start reading. If None, duration has to be not None, and position will be randomly determined.
    :param duration: How many seconds of audio to read
    :param mono: Convert to mono after reading
    :param sample_rate: Convert to given sampling rate if given
    :param padding_duration: Amount of padding (s) on each side that needs to be filled up with silence if it isn't available
    :param metadata: metadata about audio file, accelerates reading audio since duration does not need to be determined from file 
    :return: Audio signal, Audio sample rate
    '''

    if os.path.splitext(audio_path)[1][1:].lower(
    ) == "mp3":  # If its an MP3, call ffmpeg with offset and duration parameters
        # Get mp3 metadata information and duration
        if metadata is None:
            audio_sr, audio_channels, audio_duration = Metadata.get_mp3_metadata(
                audio_path)
        else:
            audio_sr = metadata[0]
            audio_channels = metadata[1]
            audio_duration = metadata[2]
        print(audio_duration)

        pad_front_duration = 0.0
        pad_back_duration = 0.0

        if offset is None:  # In this case, select random section of audio file
            assert (duration is not None)
            max_start_pos = audio_duration + 2 * padding_duration - duration
            if (
                    max_start_pos <= 0.0
            ):  # If audio file is longer than duration of desired section, take all of it, will be padded later
                print("WARNING: Audio file " + audio_path + " has length " +
                      str(audio_duration) +
                      " but is expected to be at least " + str(duration))
                return librosa.load(
                    audio_path, sample_rate, mono,
                    res_type='kaiser_fast')  # Return whole audio file
            start_pos = np.random.uniform(
                0.0, max_start_pos
            )  # Otherwise randomly determine audio section, taking padding on both sides into account
            offset = max(start_pos - padding_duration,
                         0.0)  # Read from this position in audio file
            pad_front_duration = max(padding_duration - start_pos, 0.0)
        assert (offset is not None)

        if duration is not None:  # Adjust duration if it overlaps with end of track
            pad_back_duration = max(offset + duration - audio_duration, 0.0)
            duration = duration - pad_front_duration - pad_back_duration  # Subtract padding from the amount we have to read from file
        else:  # None duration: Read from offset to end of file
            duration = audio_duration - offset

        pad_front_frames = int(pad_front_duration * float(audio_sr))
        pad_back_frames = int(pad_back_duration * float(audio_sr))

        args = [
            'ffmpeg', '-noaccurate_seek', '-ss',
            str(offset), '-t',
            str(duration), '-i', audio_path, '-f', 's16le', '-'
        ]

        audio = []
        process = subprocess.Popen(args,
                                   stdout=subprocess.PIPE,
                                   stderr=open(os.devnull, 'wb'))
        num_reads = 0
        while True:
            output = process.stdout.read(4096)
            if output == '' and process.poll() is not None:
                break
            if output:
                audio.append(
                    librosa.util.buf_to_float(output, dtype=np.float32))
                num_reads += 1

        audio = np.concatenate(audio)
        if audio_channels > 1:
            audio = audio.reshape((-1, audio_channels)).T

    else:  #Not an MP3: Handle with PySoundFile
        # open audio file
        snd_file = SoundFile(audio_path, mode='r')
        inf = snd_file._info
        audio_sr = inf.samplerate

        if duration is not None:
            num_frames = int(duration * float(audio_sr))
        pad_frames = int(padding_duration * float(audio_sr))
        pad_front_frames = 0
        pad_back_frames = 0

        if offset is None:  # In this case, select random section of audio file
            assert (duration is not None)
            max_start_pos = inf.frames + 2 * pad_frames - num_frames
            if (
                    max_start_pos <= 0
            ):  # If audio file is longer than duration of desired section, take all of it, will be padded later
                print("WARNING: Audio file " + audio_path + " has frames  " +
                      str(inf.frames) + " but is expected to be at least " +
                      str(num_frames))
                return librosa.load(
                    audio_path, sample_rate, mono,
                    res_type='kaiser_fast')  # Return whole audio file
            start_pos = np.random.randint(
                0, max_start_pos
            )  # Otherwise randomly determine audio section, taking padding on both sides into account
            start_frame = max(start_pos - pad_frames,
                              0)  # Read from this position in audio file
            pad_front_frames = max(pad_frames - start_pos, 0)
        else:
            start_frame = int(offset * float(audio_sr))

        if duration is not None:  # Adjust duration if it overlaps with end of track
            pad_back_frames = max(start_frame + num_frames - inf.frames, 0)
            num_frames = num_frames - pad_front_frames - pad_back_frames
        else:  # Duration is None => Read from start frame to end of track
            num_frames = inf.frames - start_frame

        snd_file.seek(start_frame)
        audio = snd_file.read(num_frames, dtype='float32')
        snd_file.close()
        audio = audio.T  # Tuple to numpy, transpose axis to (channels, frames)

        centre_start_frame = start_frame - pad_front_frames + pad_frames
        centre_end_frame = start_frame + num_frames + pad_back_frames - pad_frames

    # AT THIS POINT WE HAVE A [N_CHANNELS, N_SAMPLES] NUMPY ARRAY FOR THE AUDIO
    # Pad as indicated at beginning and end
    if len(audio.shape) > 1:
        audio = np.pad(audio, [(0, 0), (pad_front_frames, pad_back_frames)],
                       mode="constant",
                       constant_values=0.0)
    else:
        audio = np.pad(audio, [(pad_front_frames, pad_back_frames)],
                       mode="constant",
                       constant_values=0.0)

    # Convert to mono if desired
    if mono and len(audio.shape) > 1 and audio.shape[0] > 1:
        audio = np.mean(audio, axis=0)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        audio = librosa.resample(audio,
                                 audio_sr,
                                 sample_rate,
                                 res_type="kaiser_fast")
        audio_sr = sample_rate

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    if float(audio.shape[0]) / float(sample_rate) < 1.0:
        print("----------------------ERROR------------------")

    if os.path.splitext(audio_path)[1][1:].lower() == "mp3":
        return audio, audio_sr
    else:
        return audio, audio_sr, centre_start_frame, centre_end_frame
Exemplo n.º 10
0
def readAudio(audio_path,
              offset=0.0,
              duration=None,
              mono=True,
              sample_rate=None,
              clip=True,
              pad_frames=0,
              metadata=None):
    '''
    Reads an audio file wholly or partly, and optionally converts it to mono and changes sampling rate.
    By default, it loads the whole audio file. If the offset is set to None, the duration HAS to be not None,
    and the offset is then randomly determined so that a random section of the audio is selected with the desired duration.
    Optionally, the file can be zero-padded by a certain amount of seconds at the start and end before selecting this random section.

    :param audio_path: Path to audio file
    :param offset: Position in audio file (s) where to start reading. If None, duration has to be not None, and position will be randomly determined.
    :param duration: How many seconds of audio to read
    :param mono: Convert to mono after reading
    :param sample_rate: Convert to given sampling rate if given
    :param pad_frames: number of frames with wich to pad the audio at most if the samples at the borders are not available
    :param metadata: metadata about audio file, accelerates reading audio since duration does not need to be determined from file 
    :return: Audio signal, Audio sample rate
    '''

    if os.path.splitext(audio_path)[1][1:].lower(
    ) == "mp3":  # If its an MP3, call ffmpeg with offset and duration parameters
        # Get mp3 metadata information and duration
        if metadata is None:
            audio_sr, audio_channels, audio_duration = Metadata.get_mp3_metadata(
                audio_path)
        else:
            audio_sr = metadata[0]
            audio_channels = metadata[1]
            audio_duration = metadata[2]
        print(audio_duration)

        pad_front_duration = 0.0
        pad_back_duration = 0.0

        ref_sr = sample_rate if sample_rate is not None else audio_sr
        padding_duration = float(pad_frames) / float(ref_sr)

        if offset is None:  # In this case, select random section of audio file
            assert (duration is not None)
            max_start_pos = audio_duration + 2 * padding_duration - duration
            if (
                    max_start_pos <= 0.0
            ):  # If audio file is longer than duration of desired section, take all of it, will be padded later
                print("WARNING: Audio file " + audio_path + " has length " +
                      str(audio_duration) +
                      " but is expected to be at least " + str(duration))
                return Utils.load(audio_path, sample_rate,
                                  mono)  # Return whole audio file
            start_pos = np.random.uniform(
                0.0, max_start_pos
            )  # Otherwise randomly determine audio section, taking padding on both sides into account
            offset = max(start_pos - padding_duration,
                         0.0)  # Read from this position in audio file
            pad_front_duration = max(padding_duration - start_pos, 0.0)
        assert (offset is not None)

        if duration is not None:  # Adjust duration if it overlaps with end of track
            pad_back_duration = max(offset + duration - audio_duration, 0.0)
            duration = duration - pad_front_duration - pad_back_duration  # Subtract padding from the amount we have to read from file
        else:  # None duration: Read from offset to end of file
            duration = audio_duration - offset

        pad_front_frames = int(pad_front_duration * float(audio_sr))
        pad_back_frames = int(pad_back_duration * float(audio_sr))

        args = [
            'ffmpeg', '-noaccurate_seek', '-ss',
            str(offset), '-t',
            str(duration), '-i', audio_path, '-f', 's16le', '-'
        ]

        audio = []
        process = subprocess.Popen(args,
                                   stdout=subprocess.PIPE,
                                   stderr=open(os.devnull, 'wb'))
        num_reads = 0
        while True:
            output = process.stdout.read(4096)
            if output == '' and process.poll() is not None:
                break
            if output:
                audio.append(
                    librosa.util.buf_to_float(output, dtype=np.float32))
                num_reads += 1

        audio = np.concatenate(audio)
        if audio_channels > 1:
            audio = audio.reshape((-1, audio_channels)).T

    else:  #Not an MP3: Handle with PySoundFile
        # open audio file
        snd_file = SoundFile(audio_path, mode='r')
        inf = snd_file._info
        audio_sr = inf.samplerate

        pad_orig_frames = pad_frames if sample_rate is None else int(
            np.ceil(
                float(pad_frames) * (float(audio_sr) / float(sample_rate))))

        pad_front_frames = 0
        pad_back_frames = 0

        if offset is not None and duration is not None:
            start_frame = int(offset * float(audio_sr))
            read_frames = int(duration * float(audio_sr))
        elif offset is not None and duration is None:
            start_frame = int(offset * float(audio_sr))
            read_frames = inf.frames - start_frame
        else:  # In this case, select random section of audio file
            assert (offset is None)
            assert (duration is not None)
            num_frames = int(duration * float(audio_sr))
            max_start_pos = inf.frames - num_frames  # Maximum start position when ignoring padding on both ends of the file
            if (
                    max_start_pos <= 0
            ):  # If audio file is longer than duration of desired section, take all of it, will be padded later
                print("WARNING: Audio file " + audio_path + " has frames  " +
                      str(inf.frames) + " but is expected to be at least " +
                      str(num_frames))
                raise Exception(
                    "Could not read minimum required amount of audio data")
                #return Utils.load(audio_path, sample_rate, mono)  # Return whole audio file
            start_pos = np.random.randint(
                0, max_start_pos
            )  # Otherwise randomly determine audio section, taking padding on both sides into account

            # Translate source position into mixture input positions (take into account padding)
            start_mix_pos = start_pos - pad_orig_frames
            num_mix_frames = num_frames + 2 * pad_orig_frames
            end_mix_pos = start_mix_pos + num_mix_frames

            # Now see how much of the mixture is available, pad the rest with zeros

            start_frame = max(start_mix_pos, 0)
            end_frame = min(end_mix_pos, inf.frames)
            read_frames = end_frame - start_frame
            pad_front_frames = -min(start_mix_pos, 0)
            pad_back_frames = max(end_mix_pos - inf.frames, 0)

        assert (num_frames > 0)
        snd_file.seek(start_frame)
        audio = snd_file.read(read_frames, dtype='float32', always_2d=True)
        snd_file.close()

        centre_start_frame = start_pos
        centre_end_frame = start_pos + num_frames

    # Pad as indicated at beginning and end
    audio = np.pad(audio, [(pad_front_frames, pad_back_frames), (0, 0)],
                   mode="constant",
                   constant_values=0.0)

    # Convert to mono if desired
    if mono:
        audio = np.mean(audio, axis=1, keepdims=True)

    # Resample if needed
    if sample_rate is not None and sample_rate != audio_sr:
        audio = Utils.resample(audio, audio_sr, sample_rate)

    # Clip to [-1,1] if desired
    if clip:
        audio = np.minimum(np.maximum(audio, -1.0), 1.0)

    if float(audio.shape[0]) / float(sample_rate) < 1.0:
        raise IOError("Error while reading " + audio_path +
                      " - ended up with audio shorter than one second!")

    if os.path.splitext(audio_path)[1][1:].lower() == "mp3":
        return audio, audio_sr, offset, offset + duration
    else:
        return audio, audio_sr, centre_start_frame, centre_end_frame, start_mix_pos, end_mix_pos
Exemplo n.º 11
0
def analyze(filename):
    if wav_loader == 'pysoundfile':
        sf = SoundFile(filename)
        signal = sf.read()
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = len(sf)
        file_format = sf.format_info + ' ' + sf.subtype_info
        sf.close()
    elif wav_loader == 'scikits.audiolab':
        sf = Sndfile(filename, 'r')
        signal = sf.read_frames(sf.nframes)
        channels = sf.channels
        sample_rate = sf.samplerate
        samples = sf.nframes
        file_format = sf.format
        sf.close()
    elif wav_loader == 'scipy.io.wavfile':
        sample_rate, signal = read(filename)
        try:
            channels = signal.shape[1]
        except IndexError:
            channels = 1
        samples = signal.shape[0]
        file_format = str(signal.dtype)

        # Scale common formats
        # Other bit depths (24, 20) are not handled by SciPy correctly.
        if file_format == 'int16':
            signal = signal.astype(float) / (2**15)
        elif file_format == 'uint8':
            signal = (signal.astype(float) - 128) / (2**7)
        elif file_format == 'int32':
            signal = signal.astype(float) / (2**31)
        elif file_format == 'float32':
            pass
        else:
            raise Exception("Don't know how to handle file "
                            "format {}".format(file_format))

    else:
        raise Exception("wav_loader has failed")

    header = 'dBFS values are relative to a full-scale square wave'

    if samples/sample_rate >= 1:
        length = str(samples/sample_rate) + ' seconds'
    else:
        length = str(samples/sample_rate*1000) + ' milliseconds'

    results = [
        "Using sound file backend '" + wav_loader + "'",
        'Properties for "' + filename + '"',
        str(file_format),
        'Channels:\t%d' % channels,
        'Sampling rate:\t%d Hz' % sample_rate,
        'Samples:\t%d' % samples,
        'Length: \t' + length,
        '-----------------',
        ]

    if channels == 1:
        # Monaural
        results += properties(signal, sample_rate)
    elif channels == 2:
        # Stereo
        if array_equal(signal[:, 0], signal[:, 1]):
            results += ['Left and Right channels are identical:']
            results += properties(signal[:, 0], sample_rate)
        else:
            results += ['Left channel:']
            results += properties(signal[:, 0], sample_rate)
            results += ['Right channel:']
            results += properties(signal[:, 1], sample_rate)
    else:
        # Multi-channel
        for ch_no, channel in enumerate(signal.transpose()):
            results += ['Channel %d:' % (ch_no + 1)]
            results += properties(channel, sample_rate)

    display(header, results)

    plot_histogram = False
    if plot_histogram:
        histogram(signal)
Exemplo n.º 12
0
from soundfile import SoundFile
import pyfftw
import time
import numpy as np
import matplotlib.pyplot as plt
from helpers import *

# Opening audio file, reading frame data and sample rate, and closing file
sf = SoundFile('source.wav')
data = sf.read()
f_s = sf.samplerate
sf.close()

start = time.time()

# Builds FFTW plan from input data
my_fft = pyfftw.builders.fft(data)

# Genereating frequency values for each bin (frequencies range for 0 to sample rate)
freq_bins = [i * (f_s / my_fft.N) for i in range(1, my_fft.N + 1)]

# Executing FFT
my_fft.execute()

# Finding magnitude of each frequency found
mags = np.abs(my_fft.output_array)

end = time.time()

# Showing total execution time and info about audio file and FFT
print("Time to plan and execute: {}".format(end - start))
Exemplo n.º 13
0
class Decoder:
    """This class is an interface to read data from an audio file"""
    def __init__(self, file: str):
        """Creates an instance of a Decoder source with the given configuration

        Args:
            file_name (str): Input file name
            frames_per_buffer (int): Number of frames per buffer.
            name (str): Name of the element
        """
        self.__instance = SoundFile(file=file, mode='r')

    @property
    def sample_rate(self) -> int:
        """Return the sampling rate in Hz."""
        return self.__instance.samplerate

    @property
    def channels(self) -> int:
        """Return the number of channels."""
        return self.__instance.channels

    @property
    def file_name(self) -> str:
        """Return the file name."""
        return self.__instance.name

    @property
    def frames(self) -> int:
        """ Number of available frames"""
        return self.__instance.frames

    def done(self) -> bool:
        """Checks if there still data to read from the audio file"""
        return self.__instance.tell() < self.__instance.frames

    def start(self):
        """Starts the streaming"""
        self.__instance.seek(0)

    def stop(self):
        """Stops the streaming by seeking the file to the end"""
        self.__instance.seek(self.__instance.frames)

    def timestamp(self):
        """Returns the current streaming timestamp in seconds"""
        return self.__instance.tell() / self.__instance.samplerate

    def seek(self, frames: int):
        """Set the read position.

        Args:
            frames (int): The frame index or offset to seek

        Returns:
            The new absolute read/write position in frames
        """
        return self.__instance.seek(frames=frames)

    def read(self, frames_per_channel: int = -1):
        """Returns the buffer read from the audio file"""

        if frames_per_channel is -1:
            frames_per_channel = self.__instance.frames

        return self.__instance.read(frames=frames_per_channel,
                                    dtype='float32',
                                    always_2d=False)
Exemplo n.º 14
0
def calc_drscore(filename):

    data = SoundFile(filename)

    NblockLen = round(blocklenSec * data.samplerate)
    NblockIdx = math.ceil(data.frames / NblockLen)
    Nchannels = data.channels

    RMS = np.zeros((NblockIdx, Nchannels))
    Pk = np.zeros((NblockIdx, Nchannels))

    for nn in range(NblockIdx):
        curData = np.array(data.read(NblockLen), ndmin=2)

        for cc in range(Nchannels):
            interim = 2 * (np.power(np.abs(curData[:, cc]), 2))

            RMS[nn, cc] = math.sqrt(interim.mean())
            Pk[nn, cc] = max(abs(curData[:, cc]))

    iUpmostBlocks = round(NblockIdx * RMSpercentage * 0.01)
    RMS.sort(axis=0)
    Pk.sort(axis=0)
    RMS[:] = RMS[::-1, :]
    Pk[:] = Pk[::-1, :]

    RMS_upmost = RMS[:iUpmostBlocks, :]
    RMS_total = np.sqrt((np.power(RMS, 2)).mean(axis=0))

    pre0 = np.power(RMS_upmost, 2).sum(axis=0)
    pre1 = np.repeat(iUpmostBlocks, Nchannels, axis=0)
    pre2 = np.sqrt(pre0 / pre1)

    DR_score = Pk[NhighestPeak - 1, :] / pre2
    RMS_score = RMS_total
    Peak_score = Pk[0, :]

    DR_score_log = 20 * np.log10(DR_score)
    RMS_score_log = 20 * np.log10(RMS_score)
    Peak_score_log = 20 * np.log10(Peak_score)

    print()
    print("DR analysis results:")
    print("====================")
    print(filename)
    print()
    print("     :  ", end="")
    for n in range(Nchannels):
        print(" Chann {0:2d}  :: ".format(n + 1), end="")
    print()
    print("Peak :  ", end="")
    for peak in Peak_score_log:
        print("{0:7.2f} dB :: ".format(peak), end="")
    print()
    print("RMS  :  ", end="")
    for rms in RMS_score_log:
        print("{0:7.2f} dB :: ".format(rms), end="")
    print()
    print("DR   :  ", end="")
    for dr in DR_score_log:
        print("{0:7.2f}    :: ".format(dr), end="")
    print()

    return DR_score_log, Peak_score_log, RMS_score_log