Exemplo n.º 1
0
def info_soundfile(fp):
    info = {}
    info['duration'] = sf.info(fp).duration
    info['samples'] = int(sf.info(fp).duration * sf.info(fp).samplerate)
    info['channels'] = sf.info(fp).channels
    info['sampling_rate'] = sf.info(fp).samplerate
    return info
Exemplo n.º 2
0
    def __getitem__(self, idx):
        """Convert (noisy, clean, vad) paths to features on indexing."""
        noisy, clean, vad = self.filepaths[idx]
        if self.select is not None:
            # Quite a hacky way because noisy and clean have unequal lengths
            if sf.info(noisy).frames > sf.info(clean).frames:
                shorter = clean
            else:
                shorter = noisy
            nstart, nend = self.select(shorter)
        else:
            nstart, nend = 0, None
        sigx, sr1 = audioread(noisy, start=nstart, stop=nend)
        sigs, sr2 = audioread(clean, start=nstart, stop=nend)
        assert sr1 == sr2

        # Equalize lengths if necessary
        if len(sigx) > len(sigs):
            sigx = sigx[:len(sigs)]
        elif len(sigx) < len(sigs):
            sigs = sigs[:len(sigx)]

        # Calculate new vad timestamps
        offset = nstart * 1. / sr1
        vadref = self.tabread(vad)
        vadref = [(ts - offset, te - offset) for ts, te in vadref]
        sample = NoisySpeech(noisy=Audio(sigx, sr1),
                             clean=Audio(sigs, sr2),
                             vad=vadref)

        if self.transform:
            sample = self.transform(sample)

        return sample
Exemplo n.º 3
0
def load_wav(wavInPath, wavLength, printInfo=False):
    ''' Load an audio file as a floating point time series.
        wavInPath: path to .wav file
        wavLength: length of audio to load in seconds (0 = full length)
        info: prints details of loaded .wav to screen
        
        returns: [xSrc, numChannels, fs, xSamples] 
        to unpack:
          if numChannels==1:
            xSrc_ch1 = xSrc

          elif numChannels==2:
            xSrc_ch1 = xSrc[:,0]
            xSrc_ch2 = xSrc[:,1] '''

    audioSrc = wavInPath

    #with open(audioSrc, 'rb') as f:
    #    ySrc, ySrcSr = sf.read(f)

    #ySrc, ySrcSr = sf.read(audioSrc, channels=1, samplerate=44100, subtype='FLOAT')

    numChannels = sf.info(audioSrc).channels
    # STEREO or MONO SOURCE WAVE
    # ** wavLength==0 - use full length of src .wav file **
    if (wavLength == 0):
        xSrc, fs = sf.read(audioSrc)
        xSamples = len(xSrc)
        xLength = samples_to_time(xSamples, fs)[0]
    else:
        xLength = wavLength
        fsTmp = sf.info(audioSrc).samplerate
        durTmp = sf.info(audioSrc).duration
        if xLength > durTmp:
            sys.exit(
                'ERROR: wavLength setting exceeds the length of audio source')
        xSamples = int(time_to_samples(xLength, fsTmp))

        if numChannels == 1:
            xSrc, fs = sf.read(audioSrc, channels=1, frames=xSamples)
        elif numChannels == 2:
            xSrc, fs = sf.read(audioSrc, frames=xSamples)

#    if numChannels==1:
#        xSrc_ch1 = xSrc
#        xSrc_ch2 = 0
#    elif numChannels==2:
#        xSrc_ch1 = xSrc[:,0]
#        xSrc_ch2 = xSrc[:,1]

    numChannels = len(np.shape(xSrc))

    if printInfo == True:
        # length of input signal - '0' => length of input .wav file
        print('number of Channels = ' + str(len(np.shape(xSrc))))
        print('length of input signal in seconds: ----- ' + str(xLength))
        print('length of input signal in samples: ----- ' + str(xSamples))
        print('audio sample rate: --------------------- ' + str(fs) + '\n')

    return xSrc, numChannels, fs, xLength, xSamples
Exemplo n.º 4
0
    def getSources(self, people, locations, duration):
        '''Accepts DataFrame of people, and returns a source
            with a random location in the corresponding to each person a.'''
        sources = []
        for index, person in enumerate(people["ID"]):
            fullPath = f"{self.directory}/{person}"
            chapters = os.listdir(fullPath)
            book = random.choice(chapters)
            sentences = [
                clip.replace("\\", "/")
                for clip in sorted(glob.glob(fullPath + f"/{book}/*.flac"))
            ]
            name = f"s{person}-b{book}-d"

            line = sentences.pop(0)
            currDur = sf.info(line).duration
            data, sampRate = sf.read(line)
            lastNum = "0000"
            for line in sentences:
                if currDur < duration:
                    currDur += sf.info(line).duration
                    data = np.append(data, sf.read(line)[0])
                    lastNum = line[-9:
                                   -5]  #isolate the line number from filename
                else:
                    break
            sources.append(
                Scene.Source(location=locations[index],
                             name=name + lastNum,
                             data=(data[:sampRate * duration], sampRate)))
            del data
        return sources
Exemplo n.º 5
0
 def check_audio_info(self, tar_path, ref_path):
     """
     check basic information of the provided audio signals and display warnings if necessary
     checklist:
     - sampling rate (48kHz)
     - formats (wave)
     - channels (mono)
     - durations?
     """
     self.m_ref_path = ref_path
     self.m_tar_path = tar_path
     tar_info = sf.info(self.m_tar_path)
     ref_info = sf.info(self.m_ref_path)
     self.IS_TOO_LONG = False
     self.IS_SAME_FILE = False
     if tar_info.samplerate != 48000:
         warnings.warn(
             "target signal sampling rate is not 48kHz, it will be resampled"
         )
     if ref_info.samplerate != 48000:
         warnings.warn(
             "reference signal sampling rate is not 48kHz, it will be resampled"
         )
     if tar_info.format != ref_info.format:
         warnings.warn(
             "Target and reference signals are in different formats")
     if tar_info.channels >= 2:
         warnings.warn(
             "target signal has channel number >= 2, it will be downmixed to mono"
         )
     if ref_info.channels >= 2:
         warnings.warn(
             "reference signal has channel number >= 2, it will be downmixed to mono"
         )
     if tar_info.duration != ref_info.duration:
         warnings.warn(
             "Target and reference signals have different durations; longer one will be truncated"
         )
     if min(tar_info.duration,
            ref_info.duration) >= self.m_duration_thres_sec:
         self.IS_TOO_LONG = True
         warnings.warn(
             "Files are longer than %d seconds; using segmental SMAQ" %
             self.m_duration_thres_sec)
     if min(tar_info.duration, ref_info.duration
            ) <= 0.48:  # minimum duration due to VISQOL patch size
         warnings.warn(
             "Files should be at least 0.48 second; unreliable results might be returned"
         )
     if filecmp.cmp(self.m_tar_path, self.m_ref_path, shallow=False):
         self.IS_SAME_FILE = True
         warnings.warn("Files are bit-for-bit identical")
     print("=============================================================")
     self.m_tar_info = tar_info
     self.m_ref_info = ref_info
Exemplo n.º 6
0
 def btn_open_test_file(self):
     """
     Callback when the GUI button for open a test WAV file
     is pressed.
     """
     dlg = QFileDialog()
     dlg.setFileMode(QFileDialog.AnyFile)
     dlg.setNameFilters([
         "WAV files (*.wav)",
         "MP3 (*.mp3)",
         #"MP4 (*.mp4)",
         "FLAC (*.flac)",
         "OGG (*.ogg)",
         "Other Audio Formats (*.AIFF *.AU *.RAW)"
     ])
     if dlg.exec_():
         filename = dlg.selectedFiles()[0]
         fname, ext = os.path.splitext(filename)
         if ext.lower() == ".mp3":
             try:
                 print("Converting mp3 to wav")
                 mp3 = filename
                 filename = ""
                 new_wav = fname + ".wav"
                 if not os.path.isfile(new_wav):
                     if new_wav not in self.converted_mp3:
                         sound = AudioSegment.from_mp3(mp3)
                         sound.export(new_wav, format="wav")
                         self.converted_mp3 += [new_wav]
                 filename = new_wav
             except:
                 msg = QMessageBox()
                 msg.setIcon(QMessageBox.Information)
                 msg.setText("Could not convert mp3. Try installing ffmpeg")
                 msg.setWindowTitle("Warning")
                 #msg.setDetailedText("The details are as follows:")
                 msg.setStandardButtons(QMessageBox.Ok)
                 msg.exec()
         if len(filename) > 0:
             self.txtTestFile.setText(filename)
             if self.wf is not None:
                 self.wf.close()
             self.wf = sf.SoundFile(filename)
             self.wf_info = sf.info(filename)
             self.fs = sf.info(filename).samplerate
             self.combo_setindex_by_value(self.comboFS, self.fs)
             self.aa.set_properties(fs=self.fs)
             self.canvas.set_plot_properties(fs=self.fs)
             self.sliderWavPlayer.setValue(self.sliderWavPlayer.minimum())
             self.update_player_timelabel(0)
Exemplo n.º 7
0
    def __init__(self, playback_sound=[], rate=44100, chunk_size=1024):
        self.pa = pyaudio.PyAudio()

        self.rate = rate
        self.chunk_size = chunk_size
        self.FORMAT = pyaudio.paInt16
        self.play_sound = False

        # if given play that sound
        if playback_sound:
            self.play_sound = True
            self.playback_sound = playback_sound
            self.recording_time = sf.info(playback_sound).duration
            print(self.recording_time)
            self.rate = sf.info(playback_sound).samplerate
Exemplo n.º 8
0
def main(args):
    assert args.valid_percent >= 0 and args.valid_percent <= 1.0

    if not os.path.exists(args.dest):
        os.makedirs(args.dest)

    dir_path = os.path.realpath(args.root)
    search_path = os.path.join(dir_path, "**/*." + args.ext)
    rand = random.Random(args.seed)

    valid_f = (open(os.path.join(args.dest, "valid.tsv"), "w")
               if args.valid_percent > 0 else None)

    with open(os.path.join(args.dest, "train.tsv"), "w") as train_f:
        print(dir_path, file=train_f)

        if valid_f is not None:
            print(dir_path, file=valid_f)

        for fname in glob.iglob(search_path, recursive=True):
            file_path = os.path.realpath(fname)

            if args.path_must_contain and args.path_must_contain not in file_path:
                continue

            frames = soundfile.info(fname).frames
            dest = train_f if rand.random() > args.valid_percent else valid_f
            print("{}\t{}".format(os.path.relpath(file_path, dir_path),
                                  frames),
                  file=dest)
    if valid_f is not None:
        valid_f.close()
Exemplo n.º 9
0
    def from_file(path: Pathlike, recording_id: Optional[str] = None):
        """
        Read an audio file's header and create the corresponding ``Recording``.
        Suitable to use when each physical file represents a separate recording session.

        If a recording session consists of multiple files (e.g. one per channel),
        it is advisable to create the ``Recording`` object manually, with each
        file represented as a separate ``AudioSource`` object.

        :param path: Path to an audio file supported by libsoundfile (pysoundfile).
        :param recording_id: recording id, when not specified ream the filename's stem ("x.wav" -> "x").
        :return: a new ``Recording`` instance pointing to the audio file.
        """
        import soundfile
        info = soundfile.info(path)
        return Recording(
            id=recording_id if recording_id is not None else Path(path).stem,
            sampling_rate=info.samplerate,
            num_samples=info.frames,
            duration=info.duration,
            sources=[
                AudioSource(
                    type='file',
                    channels=list(range(info.channels)),
                    source=str(path)
                )
            ]
        )
Exemplo n.º 10
0
def get_num_segments(path, max_segment_length, min_segment_length):
    """
    Calculate the number of audio segments of sufficient length contained within an audio file.

    Args:
        path: Path to a single audio file
        max_segment_length (float): The maximum length (in seconds) of each audio segment. If `None`, 1 segment is assumed.
        min_segment_length (float): The minimum length (in seconds) of each audio segment.
    """
    try:
        file_info = sf.info(str(path))  # Load file info and check its validity
        # Return 1 if segmenting is disabled
        if max_segment_length is None:
            return 1
        # Compute the number of segments otherwise
        else:
            sr = file_info.samplerate
            samples = file_info.frames
            duration = samples / sr
            num_segments = int(duration / max_segment_length)
            if duration % max_segment_length >= min_segment_length:
                num_segments += 1
            return num_segments
    except RuntimeError:  # SoundFile raises a `RuntimeError` when it fails to read a file :(
        return 0
Exemplo n.º 11
0
 def select_wav_file(self):
     '''
     Allows the user to select a file and laods info about it
     '''
     file = select_file(self, ['wav', 'flac'])
     if file is not None:
         self.__clear()
         self.file.setText(file)
         import soundfile as sf
         self.__info = sf.info(file)
         self.channelSelector.clear()
         for i in range(0, self.__info.channels):
             self.channelSelector.addItem(f"{i + 1}")
         self.channelSelector.setEnabled(self.__info.channels > 1)
         self.startTime.setTime(QtCore.QTime(0, 0, 0))
         self.startTime.setEnabled(True)
         self.__duration = math.floor(self.__info.duration * 1000)
         max_time = QtCore.QTime(0, 0, 0).addMSecs(self.__duration)
         self.endTime.setMaximumTime(max_time)
         self.endTime.setTime(max_time)
         self.endTime.setEnabled(True)
         self.maxTime.setMaximumTime(max_time)
         self.maxTime.setTime(max_time)
         self.loadButton.setEnabled(True)
         self.updateChart.setEnabled(True)
     else:
         self.__signal = None
Exemplo n.º 12
0
    def _vqt_fn(self, wav_file, err_db=False):
        with matlab.engine.start_matlab(option='-nojvm -nodesktop') as mat_eng:
            if not err_db:
                coeffs = mat_eng.vqt_fn('wav_file', wav_file)
            else:
                coeffs, err_db = mat_eng.vqt_fn('wav_file',
                                                wav_file,
                                                nargout=2)
                assert err_db >= 285.
                logging.info('vqt accuracy - {} dB'.format(err_db))
            coeffs = np.array(coeffs._data,
                              dtype=np.float32).reshape(coeffs.size, order='F')

        wav_info = soundfile.info(wav_file)
        sr = 44100
        assert wav_info.samplerate >= sr
        if wav_info.samplerate > sr:
            num_frames = (wav_info.frames * sr + wav_info.samplerate -
                          1) // wav_info.samplerate
        else:
            num_frames = wav_info.frames
        num_frames = (num_frames + 63) // 64
        num_frames = (num_frames + 21) // 22

        assert coeffs.shape == (num_frames, 336)
        coeffs = np.require(coeffs, dtype=np.float32, requirements=['C', 'O'])

        return coeffs
Exemplo n.º 13
0
 def _setSndFromFile(self, filename):
     self.sndFile = f = sf.SoundFile(filename)
     self.sourceType = 'file'
     self.sampleRate = f.samplerate
     if self.channels == -1:  # if channels was auto then set to file val
         self.channels = f.channels
     info = sf.info(filename)  # needed for duration?
     # process start time
     if self.startTime and self.startTime > 0:
         startFrame = self.startTime*self.sampleRate
         self.sndFile.seek(int(startFrame))
         self.t = self.startTime
     else:
         self.t = 0
     # process stop time
     if self.stopTime and self.stopTime > 0:
         requestedDur = self.stopTime - self.t
         maxDur = info.duration
         self.duration = min(requestedDur, maxDur)
     else:
         self.duration = info.duration - self.t
     # can now calculate duration in frames
     self.durationFrames = int(round(self.duration*self.sampleRate))
     # are we preloading or streaming?
     if self.preBuffer == 0:
         # no buffer - stream from disk on each call to nextBlock
         pass
     elif self.preBuffer == -1:
         # no buffer - stream from disk on each call to nextBlock
         sndArr = self.sndFile.read(frames=len(self.sndFile))
         self.sndFile.close()
         self._setSndFromArray(sndArr)
Exemplo n.º 14
0
def get_samplerate(path):
    '''Get the sampling rate for a given file.

    Parameters
    ----------
    path : string, int, or file-like
        The path to the file to be loaded
        As in `load()`, this can also be an integer or open file-handle
        that can be processed by `soundfile`.

    Returns
    -------
    sr : number > 0
        The sampling rate of the given audio file

    Examples
    --------
    Get the sampling rate for the included audio file

    >>> path = librosa.util.example_audio_file()
    >>> librosa.get_samplerate(path)
    44100
    '''
    try:
        return sf.info(path).samplerate
    except RuntimeError:
        with audioread.audio_open(path) as fdesc:
            return fdesc.samplerate
Exemplo n.º 15
0
def get_duration(path: Pathlike, ) -> float:
    """
    Read a audio file, it supports pipeline style wave path and real waveform.

    :param path: Path to an audio file or a Kaldi-style pipe.
    :return: float duration of the recording, in seconds.
    """
    path = str(path)
    if path.strip().endswith("|"):
        if not is_module_available("kaldiio"):
            raise ValueError(
                "To read Kaldi's data dir where wav.scp has 'pipe' inputs, "
                "please 'pip install kaldiio' first.")
        from kaldiio import load_mat

        # Note: kaldiio.load_mat returns
        # (sampling_rate: int, samples: 1-D np.array[int])
        sampling_rate, samples = load_mat(path)
        assert len(samples.shape) == 1
        duration = samples.shape[0] / sampling_rate
        return duration
    try:
        # Try to parse the file using pysoundfile first.
        import soundfile

        info = soundfile.info(path)
    except:
        # Try to parse the file using audioread as a fallback.
        info = audioread_info(path)
    return info.duration
Exemplo n.º 16
0
    def __init__(self,
                 path="None",
                 is_wav=False,
                 stem_id=None,
                 subset=None,
                 chunk_start=0,
                 chunk_duration=None):
        self.path = path
        self.subset = subset
        self.stem_id = stem_id
        self.is_wav = is_wav

        self.chunk_start = chunk_start
        self.chunk_duration = chunk_duration

        # load and store metadata
        if os.path.exists(self.path):
            if not self.is_wav:
                self.info = stempeg.Info(self.path)
                self.samples = int(self.info.samples(self.stem_id))
                self.duration = self.info.duration(self.stem_id)
                self.rate = self.info.rate(self.stem_id)
            else:
                self.info = sf.info(self.path)
                self.samples = self.info.frames
                self.duration = self.info.duration
                self.rate = self.info.samplerate
        else:
            # set to `None` if no path was set (fake file)
            self.info = None
            self.samples = None
            self.duration = None
            self.rate = None

        self._audio = None
Exemplo n.º 17
0
def sampling_rate(file: str) -> int:
    """Sampling rate of audio file.

    Args:
        file: file name of input audio file

    Returns:
        sampling rate of audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return soundfile.info(file).samplerate
    else:
        try:
            return int(sox.file_info.sample_rate(file))
        except sox.core.SoxiError:
            cmd = f'mediainfo --Inform="Audio;%SamplingRate%" "{file}"'
            sampling_rate = run(cmd)
            if sampling_rate:
                return int(sampling_rate)
            else:
                raise RuntimeError(broken_file_error(file))
Exemplo n.º 18
0
def channels(file: str) -> int:
    """Number of channels in audio file.

    Args:
        file: file name of input audio file

    Returns:
        number of channels in audio file

    Raises:
        RuntimeError: if ``file`` is broken or not a supported format

    """
    file = audeer.safe_path(file)
    if file_extension(file) in SNDFORMATS:
        return soundfile.info(file).channels
    else:
        try:
            return int(sox.file_info.channels(file))
        except sox.core.SoxiError:
            # For MP4 stored and returned number of channels can be different
            cmd1 = f'mediainfo --Inform="Audio;%Channel(s)_Original%" "{file}"'
            cmd2 = f'mediainfo --Inform="Audio;%Channel(s)%" "{file}"'
            try:
                return int(run(cmd1))
            except ValueError:
                try:
                    return int(run(cmd2))
                except ValueError:
                    raise RuntimeError(broken_file_error(file))
Exemplo n.º 19
0
 def add_random_background(self, label=None):
     """ Add a random background to a scaper object
     Args:
         label: str or list, possible labels are names the subfolders of self.bg_path. None can use them all.
     """
     # If str or None, keep it like this
     if label is not None:
         if isinstance(label, list):
             bg_label = self.random_state.choice(label)
         elif isinstance(label, str):
             bg_label = label
         else:
             raise NotImplementedError(
                 "Background label can only be a list of available labels or a string"
             )
     else:
         bg_label = "*"
     chosen_file = self._choose_file(osp.join(self.bg_path, bg_label))
     file_duration = sf.info(chosen_file).duration
     starting_source = min(
         self.random_state.rand() * file_duration,
         max(file_duration - self.duration, 0),
     )
     self.add_background(
         label=("const", chosen_file.split("/")[-2]),
         source_file=("const", chosen_file),
         source_time=("const", starting_source),
     )
Exemplo n.º 20
0
    def __init__(self, *args, **kwargs):
        """Constructor

        Parameters
        ----------
        fs : int
            Target sampling frequency, if loaded audio does have different sampling frequency, audio will be re-sampled.
            Default value "44100"
        mono : bool
            Monophonic target, multi-channel audio will be down-mixed.
            Default value "True"
        filename : str, optional
            File path
        logger : logger
            Logger class instance, If none given logger instance will be created
            Default value "None"
        """

        self.data = kwargs.get('data', None)  # Audio data itself

        self.filename = kwargs.get('filename', None)
        if self.filename:
            self.format = self.detect_file_format(self.filename)
            if self.format == 'wav':
                self.info = soundfile.info(file=self.filename)

        self.logger = kwargs.get('logger', logging.getLogger(__name__))
        if not self.logger.handlers:
            logging.basicConfig()

        self.fs = kwargs.get('fs', 44100)
        self.mono = kwargs.get('mono', True)
Exemplo n.º 21
0
    def maps_sg_and_label_fn(wav_file):
        wav_info = soundfile.info(wav_file)
        assert wav_info.samplerate == 44100
        num_frames = MiscFns.num_samples_to_num_frames_fn(wav_info.frames)

        rec_name = os.path.basename(wav_file)[:-4]
        hcqt_file = os.path.join(os.environ['maps_hcqt'], rec_name + '.hcqt')
        _rec_name, hcqt = MiscFns.load_np_array_from_file_fn(hcqt_file)
        assert _rec_name == rec_name
        _num_frames = hcqt.shape[0]
        assert _num_frames == num_frames or _num_frames == num_frames + 1
        if _num_frames > num_frames:
            hcqt = hcqt[1:]
        assert hcqt.shape == (num_frames, 440, 6) and hcqt.dtype == np.float32

        mid_file = wav_file[:-3] + 'mid'
        num_frames_from_midi = mido.MidiFile(mid_file).length
        num_frames_from_midi = int(
            np.ceil(num_frames_from_midi * wav_info.samplerate))
        num_frames_from_midi = MiscFns.num_samples_to_num_frames_fn(
            num_frames_from_midi)
        num_frames_from_midi += 2
        num_frames = min(num_frames, num_frames_from_midi)
        hcqt = hcqt[:num_frames]

        label = MiscFns.label_fn(mid_file_name=mid_file, num_frames=num_frames)

        hcqt = np.require(hcqt, dtype=np.float32, requirements=['O', 'C'])
        hcqt.flags['WRITEABLE'] = False
        label.flags['WRITEABLE'] = False

        return dict(sg=hcqt, label=label)
Exemplo n.º 22
0
def get_samplerate(path):
    '''Get the sampling rate for a given file.

    Parameters
    ----------
    path : string, int, or file-like
        The path to the file to be loaded
        As in `load()`, this can also be an integer or open file-handle
        that can be processed by `soundfile`.

    Returns
    -------
    sr : number > 0
        The sampling rate of the given audio file

    Examples
    --------
    Get the sampling rate for the included audio file

    >>> path = librosa.util.example_audio_file()
    >>> librosa.get_samplerate(path)
    44100
    '''
    try:
        return sf.info(path).samplerate
    except RuntimeError:
        with audioread.audio_open(path) as fdesc:
            return fdesc.samplerate
Exemplo n.º 23
0
    def _vqt_without_shift_fn(self, wav_file, err_db=False):
        with matlab.engine.start_matlab(option='-nojvm -nodesktop') as mat_eng:
            _pars = [
                'db_scale', False,
                'mono', False,
                'wav_file', wav_file
            ]
            try:
                if not err_db:
                    coeffs = mat_eng.vqt_without_pitch_shift_fn(*_pars)
                else:
                    coeffs, err_db = mat_eng.vqt_without_pitch_shift_fn(*_pars, nargout=2)
                    assert err_db >= 290.
                    logging.info('vqt accuracy - {} dB'.format(err_db))
                coeffs = np.array(coeffs._data, dtype=np.float32).reshape(coeffs.size, order='F')
            except Exception as _e:
                os.system('free -g')
                raise _e
        wav_info = soundfile.info(wav_file)
        sr = 44100
        assert wav_info.samplerate == sr
        num_frames = wav_info.frames
        num_frames = int(np.ceil(np.ceil(num_frames / 64.) / 22.))
        assert coeffs.shape == (num_frames, 336, 2)
        coeffs = np.require(coeffs, dtype=np.float32, requirements=['C', 'O'])

        return coeffs
Exemplo n.º 24
0
def get_duration(
    path: Pathlike,
) -> float:
    """
    Read a audio file, it supports pipeline style wave path and real waveform.

    :param path: Path to an audio file or a Kaldi-style pipe.
    :return: float duration of the recording, in seconds.
    """
    path = str(path)
    if path.strip().endswith("|"):
        if not is_module_available("kaldi_native_io"):
            raise ValueError(
                "To read Kaldi's data dir where wav.scp has 'pipe' inputs, "
                "please 'pip install kaldi_native_io' first."
            )
        import kaldi_native_io

        wave = kaldi_native_io.read_wave(path)
        assert wave.data.shape[0] == 1, f"Expect 1 channel. Given {wave.data.shape[0]}"

        return wave.duration
    try:
        # Try to parse the file using pysoundfile first.
        import soundfile

        info = soundfile.info(path)
    except:
        # Try to parse the file using audioread as a fallback.
        info = audioread_info(path)
    return info.duration
Exemplo n.º 25
0
 def preprocess_source(self, source):
     # Only get info (sample rate), read audio file when first read request
     # happens
     self.audio_info = soundfile.info(source)
     self.sample_rate = self.audio_info.samplerate
     self.samples = []
     return source
Exemplo n.º 26
0
 def _setSndFromFile(self, filename):
     self.sndFile = f = sf.SoundFile(filename)
     self.sourceType = 'file'
     self.sampleRate = f.samplerate
     if self.channels == -1:  # if channels was auto then set to file val
         self.channels = f.channels
     info = sf.info(filename)  # needed for duration?
     # process start time
     if self.startTime and self.startTime > 0:
         startFrame = self.startTime * self.sampleRate
         self.sndFile.seek(int(startFrame))
         self.t = self.startTime
     else:
         self.t = 0
     # process stop time
     if self.stopTime and self.stopTime > 0:
         requestedDur = self.stopTime - self.t
         maxDur = info.duration
         self.duration = min(requestedDur, maxDur)
     else:
         self.duration = info.duration - self.t
     # can now calculate duration in frames
     self.durationFrames = int(round(self.duration * self.sampleRate))
     # are we preloading or streaming?
     if self.preBuffer == 0:
         # no buffer - stream from disk on each call to nextBlock
         pass
     elif self.preBuffer == -1:
         # no buffer - stream from disk on each call to nextBlock
         sndArr = self.sndFile.read(frames=len(self.sndFile))
         self.sndFile.close()
         self._setSndFromArray(sndArr)
Exemplo n.º 27
0
    def maps_sg_and_label_fn(wav_file):
        """
        read  the STFT spectrogram and generate target labels for a recording
        """
        wav_info = soundfile.info(wav_file)
        assert wav_info.samplerate == 44100
        num_frames = MiscFns.num_samples_to_num_frames_fn(wav_info.frames)

        rec_name = os.path.basename(wav_file)[:-4]
        stft_file = os.path.join(os.environ['maps_stft'], rec_name + '.stft')
        _rec_name, stft = MiscFns.load_np_array_from_file_fn(stft_file)
        assert stft.shape == (num_frames, 2817) and stft.dtype == np.float32

        mid_file = wav_file[:-3] + 'mid'
        num_frames_from_midi = mido.MidiFile(mid_file).length
        num_frames_from_midi = int(
            np.ceil(num_frames_from_midi * wav_info.samplerate))
        num_frames_from_midi = MiscFns.num_samples_to_num_frames_fn(
            num_frames_from_midi)
        num_frames_from_midi += 2
        num_frames = min(num_frames, num_frames_from_midi)
        stft = stft[:num_frames]

        label = MiscFns.label_fn(mid_file_name=mid_file, num_frames=num_frames)

        stft = np.require(stft, dtype=np.float32, requirements=['O', 'C'])
        stft.flags['WRITEABLE'] = False
        label.flags['WRITEABLE'] = False

        return dict(sg=stft, label=label)
Exemplo n.º 28
0
def readWav(inputSignalFile,
            selectedChannel=1,
            start=None,
            end=None) -> Signal:
    """ reads a wav file into a Signal.
    :param inputSignalFile: a path to the input signal file
    :param selectedChannel: the channel to read.
    :param start: the time to start reading from in HH:mm:ss.SSS format.
    :param end: the time to end reading from in HH:mm:ss.SSS format.
    :returns: Signal.
    """
    def asFrames(time, fs):
        hours, minutes, seconds = (time.split(":"))[-3:]
        hours = int(hours)
        minutes = int(minutes)
        seconds = float(seconds)
        millis = int((3600000 * hours) + (60000 * minutes) + (1000 * seconds))
        return int(millis * (fs / 1000))

    import soundfile as sf
    if start is not None or end is not None:
        info = sf.info(inputSignalFile)
        startFrame = 0 if start is None else asFrames(start, info.samplerate)
        endFrame = None if end is None else asFrames(end, info.samplerate)
        ys, frameRate = sf.read(inputSignalFile,
                                start=startFrame,
                                stop=endFrame)
    else:
        ys, frameRate = sf.read(inputSignalFile)
    return Signal(ys[::selectedChannel], frameRate)
Exemplo n.º 29
0
 def __init__(self, parent, dirList, fileList, scanMode, formats, sampleRates, channels, scanLimits, tag):
     if dirList:
         ImportDialogScan.__init__(self, parent, dirList, scanMode, formats, sampleRates, channels, scanLimits)
         self.defaultTags = [tag]
     else:
         ImportDialog.__init__(self, parent)
     unknownFiles = []
     self.dirList = dirList
     for filePath in fileList:
         try:
             info = soundfile.info(filePath)
         except:
             unknownFiles.append(filePath)
             continue
         fileInfo = QtCore.QFileInfo(filePath)
         fileItem = QtGui.QStandardItem(fileInfo.fileName())
         fileItem.setData(filePath, FilePathRole)
         fileItem.setData(info, InfoRole)
         fileItem.setToolTip(fileInfo.fileName())
         fileItem.setCheckable(True)
         fileItem.setCheckState(QtCore.Qt.Checked)
         dirItem = QtGui.QStandardItem(fileInfo.absolutePath())
         dirItem.setToolTip(fileInfo.absoluteFilePath())
         lengthItem = QtGui.QStandardItem('{:.3f}'.format(float(info.frames) / info.samplerate))
         formatItem = QtGui.QStandardItem(info.format)
         rateItem = QtGui.QStandardItem(str(info.samplerate))
         channelsItem = QtGui.QStandardItem(str(info.channels))
         subtypeItem = QtGui.QStandardItem(info.subtype)
         tagsItem = QtGui.QStandardItem()
         tagsItem.setData([tag], TagsRole)
         self.sampleModel.appendRow([fileItem, dirItem, lengthItem, formatItem, rateItem, channelsItem, subtypeItem, tagsItem])
Exemplo n.º 30
0
def get_zip_manifest(
        zip_path: Path, zip_root: Optional[Path] = None, is_audio=False
):
    _zip_path = Path.joinpath(zip_root or Path(""), zip_path)
    with zipfile.ZipFile(_zip_path, mode="r") as f:
        info = f.infolist()
    paths, lengths = {}, {}
    for i in tqdm(info):
        utt_id = Path(i.filename).stem
        offset, file_size = i.header_offset + 30 + len(i.filename), i.file_size
        paths[utt_id] = f"{zip_path.as_posix()}:{offset}:{file_size}"
        with open(_zip_path, "rb") as f:
            f.seek(offset)
            byte_data = f.read(file_size)
            assert len(byte_data) > 1
            if is_audio:
                assert is_sf_audio_data(byte_data), i
            else:
                assert is_npy_data(byte_data), i
            byte_data_fp = io.BytesIO(byte_data)
            if is_audio:
                lengths[utt_id] = sf.info(byte_data_fp).frames
            else:
                lengths[utt_id] = np.load(byte_data_fp).shape[0]
    return paths, lengths
Exemplo n.º 31
0
def main(args):
    assert args.valid_percent >= 0 and args.valid_percent <= 1.0

    dir_path = os.path.realpath(args.root)
    search_path = os.path.join(dir_path, "**/84-*." + args.ext)
    rand = random.Random(args.seed)

    with open(os.path.join(args.dest, "train.tsv"), "w") as train_f, open(
        os.path.join(args.dest, "valid.tsv"), "w") as valid_f:
        print(dir_path, file=train_f)
        print(dir_path, file=valid_f)

        for fname in glob.iglob(search_path, recursive=True):
            file_path = os.path.realpath(fname)

            if args.path_must_contain and args.path_must_contain not in file_path:
                continue
            if fname.split('.')[-1] == 'flac' or fname.split('.')[-1] == 'wav':
                # print(fname.split('.')[-1], fname.split('.')[-1] == 'flac' or 'wav', file=train_f)
                frames = soundfile.info(fname).frames
                dest = train_f if rand.random() > args.valid_percent else valid_f
                print(
                    "{}\t{}".format(os.path.relpath(file_path, dir_path), frames), file=dest
                )
            else: # process embedding
                import numpy
                frames = len(numpy.loadtxt(fname))
                dest = train_f if rand.random() > args.valid_percent else valid_f
                print(
                    "{}\t{}".format(os.path.relpath(file_path, dir_path), frames), file=dest
                )
Exemplo n.º 32
0
def stream(path, block_length, frame_length, hop_length,
           mono=True, offset=0.0, duration=None, fill_value=None,
           dtype=np.float32):
    '''Stream audio in fixed-length buffers.

    This is primarily useful for processing large files that won't
    fit entirely in memory at once.

    Instead of loading the entire audio signal into memory (as
    in `load()`, this function produces *blocks* of audio spanning
    a fixed number of frames at a specified frame length and hop
    length.

    While this function strives for similar behavior to `load`,
    there are a few caveats that users should be aware of:

        1. This function does not return audio buffers directly.
           It returns a generator, which you can iterate over
           to produce blocks of audio.  A *block*, in this context,
           refers to a buffer of audio which spans a given number of
           (potentially overlapping) frames.
        2. Automatic sample-rate conversion is not supported.
           Audio will be streamed in its native sample rate,
           so no default values are provided for `frame_length`
           and `hop_length`.  It is recommended that you first
           get the sampling rate for the file in question, using
           `get_samplerate()`, and set these parameters accordingly.
        3. Many analyses require access to the entire signal
           to behave correctly, such as `resample`, `cqt`, or
           `beat_track`, so these methods will not be appropriate
           for streamed data.
        4. The `block_length` parameter specifies how many frames
           of audio will be produced per block.  Larger values will
           consume more memory, but will be more efficient to process
           down-stream.  The best value will ultimately depend on your
           application and other system constraints.
        5. By default, most librosa analyses (e.g., short-time Fourier
           transform) assume centered frames, which requires padding the
           signal at the beginning and end.  This will not work correctly
           when the signal is carved into blocks, because it would introduce
           padding in the middle of the signal.  To disable this feature,
           use `center=False` in all frame-based analyses.

    See the examples below for proper usage of this function.


    Parameters
    ----------
    path : string, int, or file-like object
        path to the input file to stream.

        Any codec supported by `soundfile` is permitted here.

    block_length : int > 0
        The number of frames to include in each block.

        Note that at the end of the file, there may not be enough
        data to fill an entire block, resulting in a shorter block
        by default.  To pad the signal out so that blocks are always
        full length, set `fill_value` (see below).

    frame_length : int > 0
        The number of samples per frame.

    hop_length : int > 0
        The number of samples to advance between frames.

        Note that by when `hop_length < frame_length`, neighboring frames
        will overlap.  Similarly, the last frame of one *block* will overlap
        with the first frame of the next *block*.

    mono : bool
        Convert the signal to mono during streaming

    offset : float
        Start reading after this time (in seconds)

    duration : float
        Only load up to this much audio (in seconds)

    fill_value : float [optional]
        If padding the signal to produce constant-length blocks,
        this value will be used at the end of the signal.

        In most cases, `fill_value=0` (silence) is expected, but
        you may specify any value here.

    dtype : numeric type
        data type of audio buffers to be produced

    Yields
    ------
    y : np.ndarray
        An audio buffer of (at most) 
        `block_length * (hop_length-1) + frame_length` samples.

    See Also
    --------
    load
    get_samplerate
    soundfile.blocks

    Examples
    --------
    Apply a short-term Fourier transform to blocks of 256 frames
    at a time.  Note that streaming operation requires left-aligned
    frames, so we must set `center=False` to avoid padding artifacts.

    >>> filename = librosa.util.example_audio_file()
    >>> sr = librosa.get_samplerate(filename)
    >>> stream librosa.stream(filename,
    ...                       block_length=256,
    ...                       frame_length=4096,
    ...                       hop_length=1024)
    >>> for y_block in stream:
    ...     D_block = librosa.stft(y_block, center=False)

    Or compute a mel spectrogram over a stream, using a shorter frame
    and non-overlapping windows

    >>> filename = librosa.util.example_audio_file()
    >>> sr = librosa.get_samplerate(filename)
    >>> stream = librosa.stream(filename,
    ...                         block_length=256,
    ...                         frame_length=2048,
    ...                         hop_length=2048)
    >>> for y_block in stream:
    ...     m_block = librosa.feature.melspectrogram(y_block, sr=sr,
    ...                                              n_fft=2048,
    ...                                              hop_length=2048,
    ...                                              center=False)

    '''

    if not (np.issubdtype(type(block_length), np.integer) and block_length > 0):
        raise ParameterError('block_length={} must be a positive integer')
    if not (np.issubdtype(type(frame_length), np.integer) and frame_length > 0):
        raise ParameterError('frame_length={} must be a positive integer')
    if not (np.issubdtype(type(hop_length), np.integer) and hop_length > 0):
        raise ParameterError('hop_length={} must be a positive integer')

    # Get the sample rate from the file info
    sr = sf.info(path).samplerate

    # Construct the stream
    if offset:
        start = int(offset * sr)
    else:
        start = 0

    if duration:
        frames = int(duration * sr)
    else:
        frames = -1

    blocks = sf.blocks(path,
                       blocksize=frame_length + (block_length - 1) * hop_length,
                       overlap=frame_length - hop_length,
                       fill_value=fill_value,
                       start=start,
                       frames=frames,
                       dtype=dtype,
                       always_2d=False)

    for block in blocks:
        if mono:
            yield to_mono(block.T)
        else:
            yield block.T
Exemplo n.º 33
0
def get_duration(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
                 center=True, filename=None):
    """Compute the duration (in seconds) of an audio time series,
    feature matrix, or filename.

    Examples
    --------
    >>> # Load the example audio file
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> librosa.get_duration(y=y, sr=sr)
    61.45886621315193

    >>> # Or directly from an audio file
    >>> librosa.get_duration(filename=librosa.util.example_audio_file())
    61.4

    >>> # Or compute duration from an STFT matrix
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> S = librosa.stft(y)
    >>> librosa.get_duration(S=S, sr=sr)
    61.44

    >>> # Or a non-centered STFT matrix
    >>> S_left = librosa.stft(y, center=False)
    >>> librosa.get_duration(S=S_left, sr=sr)
    61.3471201814059

    Parameters
    ----------
    y : np.ndarray [shape=(n,), (2, n)] or None
        audio time series

    sr : number > 0 [scalar]
        audio sampling rate of `y`

    S : np.ndarray [shape=(d, t)] or None
        STFT matrix, or any STFT-derived matrix (e.g., chromagram
        or mel spectrogram).
        Durations calculated from spectrogram inputs are only accurate
        up to the frame resolution. If high precision is required,
        it is better to use the audio time series directly.

    n_fft       : int > 0 [scalar]
        FFT window size for `S`

    hop_length  : int > 0 [ scalar]
        number of audio samples between columns of `S`

    center  : boolean
        - If `True`, `S[:, t]` is centered at `y[t * hop_length]`
        - If `False`, then `S[:, t]` begins at `y[t * hop_length]`

    filename : str
        If provided, all other parameters are ignored, and the
        duration is calculated directly from the audio file.
        Note that this avoids loading the contents into memory,
        and is therefore useful for querying the duration of
        long files.

        As in `load()`, this can also be an integer or open file-handle
        that can be processed by `soundfile`.

    Returns
    -------
    d : float >= 0
        Duration (in seconds) of the input time series or spectrogram.

    Raises
    ------
    ParameterError
        if none of `y`, `S`, or `filename` are provided.

    Notes
    -----
    `get_duration` can be applied to a file (`filename`), a spectrogram (`S`),
    or audio buffer (`y, sr`).  Only one of these three options should be
    provided.  If you do provide multiple options (e.g., `filename` and `S`),
    then `filename` takes precedence over `S`, and `S` takes precedence over
    `(y, sr)`.
    """

    if filename is not None:
        try:
            return sf.info(filename).duration
        except RuntimeError:
            with audioread.audio_open(filename) as fdesc:
                return fdesc.duration

    if y is None:
        if S is None:
            raise ParameterError('At least one of (y, sr), S, or filename must be provided')

        n_frames = S.shape[1]
        n_samples = n_fft + hop_length * (n_frames - 1)

        # If centered, we lose half a window from each end of S
        if center:
            n_samples = n_samples - 2 * int(n_fft / 2)

    else:
        # Validate the audio buffer.  Stereo is okay here.
        util.valid_audio(y, mono=False)
        if y.ndim == 1:
            n_samples = len(y)
        else:
            n_samples = y.shape[-1]

    return float(n_samples) / sr