Exemplo n.º 1
0
def test_yin_multi(y_multi):
    y, sr = y_multi

    Pall = librosa.yin(y, fmin=30, fmax=300)
    P0 = librosa.yin(y[0], fmin=30, fmax=300)
    P1 = librosa.yin(y[1], fmin=30, fmax=300)

    assert np.allclose(Pall[0], P0)
    assert np.allclose(Pall[1], P1)

    assert not np.allclose(P0, P1)
Exemplo n.º 2
0
 def compute():
     self.F0 = librosa.yin(y=samples.mean(axis=1),
                           sr=self.samplerate,
                           frame_length=2048,
                           fmin=librosa.note_to_hz('C2'),
                           fmax=librosa.note_to_hz('C5'),
                           center=False)
Exemplo n.º 3
0
    def analyze(cls, events: List[CorpusEvent],
                metadata: Metadata) -> List[CorpusEvent]:
        if not FeatureUtils.is_valid_audio(events, metadata):
            raise FeatureError(
                f"Feature '{cls.__name__}' does not support content of "
                f"type {metadata.content_type.__class__.__name__}")

        metadata: AudioMetadata = typing.cast(AudioMetadata, metadata)
        # TODO: Pass rather than hard-code
        yin_frames: np.ndarray = librosa.yin(metadata.foreground_data,
                                             fmin=50,
                                             fmax=4186,
                                             sr=metadata.sr,
                                             frame_length=2048,
                                             hop_length=metadata.hop_length)
        yin_midipitches: np.ndarray = np.round(
            12 * np.log2(yin_frames / 8.175798915643707))
        for event in events:
            onset_frame: int = librosa.time_to_frames(
                event.onset, sr=metadata.sr, hop_length=metadata.hop_length)
            end_frame: int = librosa.time_to_frames(
                event.onset + event.duration,
                sr=metadata.sr,
                hop_length=metadata.hop_length)
            hist, _ = np.histogram(yin_midipitches[onset_frame:end_frame],
                                   bins=128,
                                   range=(0, 128))
            pitch: int = int(np.argmax(hist))
            event.set_feature(cls(value=pitch))

        return events
Exemplo n.º 4
0
def calculate_f0(file_path, fmin=65, fmax=2093, sample_rate=44100, frame_length=2048, win_length=735 * 2,
                 hop_length=735, threshold=0.1, seuil_voice=250):
    """
    We calculated the fundamental frequency by using the algorithm yin and all the f0 higher than seuil_voice will be
    considered as unvoiced and be reset to 0

    :param seuil_voice:
    :param threshold:
    :param file_path: the path of the wav file .wav
    :param fmin:
    :param fmax:
    :param sample_rate: the sample rate of the wav file
    :param frame_length:
    :param win_length:
    :param hop_length:
    :return: a numpy array which contains the f0 at every moment
    """

    wav_file, _ = librosa.load(file_path, sr=sample_rate)
    result_f0 = librosa.yin(wav_file, fmin=fmin, fmax=fmax, sr=sample_rate, frame_length=frame_length,
                            win_length=win_length, hop_length=hop_length, trough_threshold=threshold)
    # choose a threshold to distinguish voiced and unvoiced part of the waveform
    # for the unvoiced parts, we consider the F0 = 0
    result_f0[result_f0 >= seuil_voice] = 0
    # initialize a vector to save the voiced unvoiced flags
    uv_flags = np.zeros(result_f0.shape)
    uv_flags[result_f0 > 0] = 1
    return result_f0, uv_flags
Exemplo n.º 5
0
    def __call__(self, data):
        signal = data['signal']
        sr = data['sample_rate']

        self.n_fft = int(np.ceil(0.025 * sr))
        self.win_length = int(np.ceil(0.025 * sr))
        self.hop_length = int(np.ceil(0.01 * sr))

        audio_mfcc = librosa.feature.mfcc(y=signal.numpy().reshape(-1),
                                          sr=sr,
                                          n_mfcc=13,
                                          n_fft=self.n_fft,
                                          hop_length=self.hop_length)

        #f0, voiced_flag, voiced_probs = librosa.pyin(y=signal.numpy().reshape(-1),
        #                                     sr=sr,
        #                                     frame_length = self.n_fft,
        #                                     hop_length = self.hop_length,
        #                                     fmin=librosa.note_to_hz('C2'),
        #                                     fmax=librosa.note_to_hz('C7'))

        f0 = librosa.yin(y=signal.numpy().reshape(-1),
                         sr=sr,
                         frame_length=self.n_fft,
                         hop_length=self.hop_length,
                         fmin=librosa.note_to_hz('C2'),
                         fmax=librosa.note_to_hz('C7'))
        f0[f0 > 500] = 0

        f0[np.isnan(f0)] = 0

        delta1 = librosa.feature.delta(audio_mfcc)
        #delta1 = delta1 / np.linalg.norm(delta1)
        delta2 = librosa.feature.delta(audio_mfcc, order=2)
        #delta2 = delta2 / np.linalg.norm(delta2)
        mfcc_result = np.concatenate(
            (audio_mfcc.transpose(), delta1.transpose(), delta2.transpose(),
             f0.transpose().reshape((-1, 1))),
            axis=1)

        mfcc_result = torch.from_numpy(mfcc_result)
        mfcc_result = mfcc_pad2(mfcc_result)
        #f0 = f0 / np.linalg.norm(f0)

        data['MFCC'] = mfcc_result
        data['input'] = mfcc_result

        return data
Exemplo n.º 6
0
    def __call__(self, data):
        signal = data['signal']
        sr = data['sample_rate']
        self.n_fft = int(np.ceil(0.025 * sr))
        self.win_length = int(np.ceil(0.025 * sr))
        self.hop_length = int(np.ceil(0.01 * sr))
        audio_mfcc = torch.FloatTensor(
            librosa.feature.mfcc(y=signal.numpy().reshape(-1),
                                 sr=sr,
                                 n_mfcc=13,
                                 n_fft=self.n_fft,
                                 hop_length=self.hop_length))
        #f0, voiced_flag, voiced_probs = librosa.pyin(y=signal.numpy().reshape(-1),
        #                                             sr=sr,
        #                                             frame_length=n_fft,
        #                                             hop_length=hop_length,
        #                                             fmin=librosa.note_to_hz('C2'),
        #                                             fmax=librosa.note_to_hz('C7'))
        f0 = torch.FloatTensor(
            librosa.yin(y=signal.numpy().reshape(-1),
                        sr=sr,
                        frame_length=self.n_fft,
                        hop_length=self.hop_length,
                        fmin=librosa.note_to_hz('C2'),
                        fmax=librosa.note_to_hz('C7')))
        f0[f0 > 500] = 0

        f0[torch.isnan(f0)] = 0
        delta1 = torch.FloatTensor(librosa.feature.delta(audio_mfcc))
        delta2 = torch.FloatTensor(librosa.feature.delta(audio_mfcc, order=2))
        # mfcc_result = np.concatenate((audio_mfcc, delta1, delta2), axis=0)
        f0_delta1 = torch.FloatTensor(librosa.feature.delta(f0)).view(1, -1)
        f0_delta2 = torch.FloatTensor(librosa.feature.delta(f0, order=2)).view(
            1, -1)

        audio_mfcc = np.concatenate((audio_mfcc, f0.reshape((1, -1))))
        delta1 = torch.cat((delta1, f0_delta1))
        delta2 = torch.cat((delta2, f0_delta2))

        audio_mfcc = torch.from_numpy(audio_mfcc)

        mfcc_result = torch.stack([audio_mfcc, delta1, delta2])
        mfcc_result = mfcc_pad1(mfcc_result)

        data['MFCC'] = mfcc_result
        data['input'] = mfcc_result

        return data
Exemplo n.º 7
0
def calculate_f0_without_threshold(file_path, fmin=65, fmax=2093, sample_rate=44100, frame_length=2048,
                                   win_length=735 * 2,
                                   hop_length=735, threshold=0.1):
    """
    calculation of the fundamental frequency without the threshold
    :param file_path:
    :param fmin:
    :param fmax:
    :param sample_rate:
    :param frame_length:
    :param win_length:
    :param hop_length:
    :param threshold:
    :return:
    """
    wav_file, _ = librosa.load(file_path, sr=sample_rate)
    result_f0 = librosa.yin(wav_file, fmin=fmin, fmax=fmax, sr=sample_rate, frame_length=frame_length,
                            win_length=win_length, hop_length=hop_length, trough_threshold=threshold)
    return result_f0
Exemplo n.º 8
0
import sounddevice as sd
from scipy.io.wavfile import write
import librosa

fs = 44100  #Sample rate
seconds = 4  #duration of recording

print('start recording')
myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
sd.wait()  #wait until recording is finished
print('finished recording')
write('output.wav', fs, myrecording)  #save as wav file
y, sr = librosa.load('output.wav')
librosa.yin(y, fmin=310, fmax=400)

# Guitar strings are E2=82.41Hz, A2=110Hz, D3=146.8Hz, G3=196Hz, B3=246.9Hz, E4=329.6Hz
Exemplo n.º 9
0
y = normalize_signal(y)

f_min = 80  #Hz
f_max = 1300  #Hz
taus = np.arange(start=sr // f_max, stop=sr // f_min)

coinv_window = int(taus[-1] + 1)
n_windows = len(y) // coinv_window - 1

pitches1 = step1(y)
pitches2 = step2(y)
pitches3 = step3(y)
pitches4 = step4(y)
pitches5 = step5(y)
pitches6 = step6(y)
librosa_pitches = librosa.yin(y, 100, 1300, win_length=coinv_window)
fig, axs = plt.subplots(3, 2)
custom_xlim = 0, len(y) / sr
custom_ylim = 0, max(pitches5) * 1.25
axs[0, 0].plot(np.linspace(0, len(y) / sr, num=n_windows), pitches1)
axs[0, 0].hlines(440,
                 -0.1 * len(y) / sr,
                 len(y) / sr * 1.1,
                 colors='r',
                 linestyles='--')
axs[0, 0].set_title('Step 1')
axs[0, 1].plot(np.linspace(0, len(y) / sr, num=n_windows), pitches2)
axs[0, 1].hlines(440,
                 -0.1 * len(y) / sr,
                 len(y) / sr * 1.1,
                 colors='r',
Exemplo n.º 10
0
    def __init__(self, *args, **kwargs):
        super(self.__class__, self).__init__(*args, **kwargs)

        args, _ = parser.parse_known_args()

        if args.samplerate == None:
            self.samplerate = \
                int(sd.query_devices(args.input_device)['default_samplerate'])
        else:
            self.samplerate = int(args.samplerate)
        print(f"INFO -- Sampling rate at {self.samplerate} Hz")

        self.threadpool = QtCore.QThreadPool()

        self.q = queue.Queue()

        self.setFixedSize(args.width, args.height)
        self.mainbox = QtWidgets.QWidget()
        self.setCentralWidget(self.mainbox)
        self.layout = QtWidgets.QGridLayout()
        self.mainbox.setLayout(self.layout)

        # Widgets
        self.spec_plot = SpectrogramWidget()
        self.wave_plot = WaveFormWidget()

        for i, widget in enumerate([self.spec_plot, self.wave_plot]):
            self.layout.addWidget(widget, i, 0)

        # Initialize x and y
        self.length = self.samplerate * args.duration
        self.y = np.random.rand(self.length, len(args.channels))
        self.x = np.linspace(0, args.duration, num=self.length)

        self.zcr = librosa.feature.zero_crossing_rate(self.y.mean(axis=1))[0]

        # Wave Plot
        self.waveline_1 = self.wave_plot.plot(x=self.x,
                                              y=self.y[:, 0],
                                              pen=pg.mkPen('g', width=0.5),
                                              name='channel_1')
        self.waveline_2 = self.wave_plot.plot(x=self.x,
                                              y=self.y[:, 1],
                                              pen=pg.mkPen('y', width=0.5),
                                              name='channel_2')
        self.waveline_3 = self.wave_plot.plot(x=np.linspace(
            0, args.duration, self.zcr.shape[0]),
                                              y=self.zcr,
                                              pen=pg.mkPen('r', width=2),
                                              name='zcr')

        # Spectrogram
        self.fmax = int(
            librosa.core.fft_frequencies(sr=self.samplerate,
                                         n_fft=args.n_fft)[-1])
        D = librosa.stft(y=self.y.mean(axis=1), n_fft=args.n_fft, center=False)
        self.specdata = librosa.amplitude_to_db(np.abs(D), ref=np.max)

        # M = librosa.feature.melspectrogram(
        #             y=self.y.mean(axis=1),
        #             sr=self.samplerate,
        #             n_fft=args.n_fft,
        #             n_mels=args.n_mels)
        # self.specdata = librosa.power_to_db(S=M, ref=np.max)

        self.F0 = librosa.yin(y=self.y.mean(axis=1),
                              sr=self.samplerate,
                              frame_length=2048,
                              fmin=librosa.note_to_hz('C2'),
                              fmax=librosa.note_to_hz('C5'),
                              center=False)
        self.spec_image = pg.ImageItem(item=self.specdata.T)
        self.spec_plot.addItem(item=self.spec_image)
        self.f0_line = self.spec_plot.plot(x=np.linspace(
            0, args.duration, self.F0.shape[0]),
                                           y=self.F0,
                                           pen=pg.mkPen('r', width=2),
                                           name='f0')
        self.bar = pg.ColorBarItem(values=(librosa.note_to_hz('C2'),
                                           librosa.note_to_hz('C5')),
                                   cmap=pg.colormap.get('CET-L9'))
        self.bar.setImageItem(self.spec_image)

        # Start audio stream and animations
        self.start_stream()
        if args.input_device == 'Virtual Input (VB-Audio Virtual Cable), Windows DirectSound':
            self.play_media(media_url=args.media_url,
                            type='stream',
                            volume=100)
        self.animate()
        self.show()
Exemplo n.º 11
0
              'audio_files/oh/oh_0.wav',
              'audio_files/eeh/eeh_0.wav',
              'audio_files/ah/ah_1.wav',
              'audio_files/oh/oh_1.wav',
              'audio_files/eeh/eeh_1.wav',
              'audio_files/ah/ah_2.wav',
              'audio_files/oh/oh_2.wav',
              'audio_files/eeh/eeh_2.wav']
sythesized = np.zeros(0)
recorded = np.zeros(0)


for i, file_name in enumerate(file_names):
    y, sr = librosa.load(file_name, sr=44100)
    
    pitch = librosa.yin(y, 50, 1000)
    fr = np.mean(pitch)
    
    n_fft = 8096
    hop_size = 256
    
    p = librosa.stft(y, n_fft=n_fft, hop_length=hop_size)
    d = librosa.amplitude_to_db( np.abs(p), ref=np.max )
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
    
    m = np.mean( d , axis=1 )
    m -= np.min( m )
    
    def moving_average(x, w):
        x = np.hstack( ( x[0]*np.ones(w//2) , x , x[-1]*np.ones(w//2-1) ) )
        ma = np.convolve(x, np.ones(w), 'valid') / w
Exemplo n.º 12
0
def f0(wave_form, sample_rate, hop_length, fmin=80, fmax=10000):
    return yin(wave_form, fmin=fmin, fmax=fmax,
               hop_length=hop_length).reshape(-1, 1)
Exemplo n.º 13
0
def pitch(samples: Samples) -> float:
    freqs = librosa.yin(samples.data, 65, 2000, sr=samples.rate)
    return np.mean(freqs)
Exemplo n.º 14
0
 def get_f0(self):
     f0 = lr.yin(self.samples, 65, 2093, SAMPLE_RATE)
     return np.mean(f0)