def extract_f0(waveform, fs=16000, f0_min=30, f0_max=550, harmonics=10., voicing=50., configuration="pitch_tracker"): """Extract F0 from a waveform """ # first determine f0 without limits, then use mean and std of the first estimate # to limit search range. if (f0_min == 0) or (f0_max == 0): if USE_REAPER and (configuration == "REAPER"): _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max) else: (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200) mean_f0 = np.mean(f0[f0 > 0]) std_f0 = np.std(f0[f0 > 0]) f0_min = max((mean_f0 - 3*std_f0, 40.0)) f0_max = mean_f0 + 6*std_f0 logger.debug("f0_min = %f, f0_max = %f" % (f0_min, f0_max)) if USE_REAPER and (configuration == "REAPER"): _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max) else: (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200) return f0
def reaper(self): """Extracts f0 using REAPER. Note VUV in F0 is represented using -1.0 Returns: (np.ndarray[n_frames]): fundamental frequency. """ pm_times, pm, f0_times, f0, corr = pyreaper.reaper( self.data, self.sample_rate) return f0
def extract_f0(y, sampling_rate, f0_floor, f0_ceil, frame_shift_ms, use_reaper=True): if use_reaper: pm_times, pm, f0_times, f0, corr = pyreaper.reaper( y, fs=sampling_rate, minf0=f0_floor, maxf0=f0_ceil, frame_period=frame_shift_ms / 1000.0) f0[f0 == -1.] = 0. else: f0 = pysptk.sptk.rapt( y.astype(np.float32), fs=sampling_rate, min=f0_floor, max=f0_ceil, hopsize=int(frame_shift_ms / 1000.0 * sampling_rate), voice_bias=0.3) return f0.astype(np.float32)
def voice_analysis(): # Get command line FILENAME = sys.argv[1] WAV_FILE = "../wav/"+FILENAME+".wav" option = sys.argv[2:] # Anlysis fs, row_data = wavfile.read(WAV_FILE) pm_times, pm, f0_times, f0, corr = pyreaper.reaper(row_data, fs) _time = pm_times[-1] # Save time length SAVE_FILENAME_NPY = "../time_arr/"+FILENAME+"_time.npy" np.save(SAVE_FILENAME_NPY, np.array([_time])) SAVE_FILENAME_TXT = "../time_arr/"+FILENAME+"_time.txt" np.savetxt(SAVE_FILENAME_TXT, np.array([_time]), fmt='%d', delimiter=',') # -debug option if "-debug" in option: fig,axes = plt.subplots(nrows=2,ncols=2,figsize=(14,10)) # row_data graph axes[0,0].plot(row_data, label="Row_data") axes[0,0].legend(fontsize=10) # freq graph axes[0,1].plot(pm_times, pm, linewidth=3, color="red", label="Pitch mark") axes[0,1].legend(fontsize=10) # pirch mark graph axes[1,0].plot(f0_times, f0, linewidth=3, color="green", label="F0 contour") axes[1,0].legend(fontsize=10) # corr graph axes[1,1].plot(f0_times, corr, linewidth=3, color="blue", label="Correlations") axes[1,1].legend(fontsize=10) plt.show(); return row_data, f0, f0_times, _time
def pyreaper_pitch(wavdata_int, fs, frame_shift, max_pitch, min_pitch, high_pass, hilbert_transform, inter_mark): """Return F0 vector estimated by pyreaper (Python package) along with corresponding times Args: wavdata_int - 16-bit integer data from WAV file [NumPy array] fs - Sampling frequency in Hz [integer] frame_shift - Length of each frame in ms [integer] max_pitch - Maximum valid F0 allowed in Hz [integer] min_pitch - Minimum valid F0 allowed in Hz [integer] high_pass - Whether to apply high pass filter (80 Hz) to input [Boolean] hilbert_transform - Whether to apply Hilbert transform that may reduce phase distortion [Boolean] inter_mark - Regular inter-mark interval to use in unvoiced pitchmark regions given in milliseconds [integer] Returns: F0_times - Times corresponding to F0 estimates [NumPy vector] F0 - F0 estimates [NumPy vector] """ try: from pyreaper import reaper pm_times, pm, F0_times, F0, corr = reaper(wavdata_int, fs, minf0=min_pitch, maxf0=max_pitch, do_high_pass=high_pass, do_hilbert_transform=hilbert_transform, inter_pulse=inter_mark / 1000.0, frame_period=frame_shift / 1000.0) except ImportError: # pragma: no cover print("Need Python library pyreaper. Is it installed?") # Replace invalid measurements with NaN F0[F0 < 0] = np.nan return F0_times, F0
def test_reaper(): N = 10 for n in tqdm(range(N)): fs, x = wavfile.read(join(dirname(__file__), "test16k.wav")) pm_times, pm, f0_times, f0, corr = reaper(x, fs)
def get_pitch(fs, x): pm_times, pm, f0_times, f0, corr = pyreaper.reaper(x, fs) values, counts = np.unique(f0, return_counts=True) return values[counts.argmax()]
def F0(data, rate): pm_times, pm, f0_times, f0, corr = \ pyreaper.reaper(data, rate, minf0=min(args.f0_range), maxf0=max(args.f0_range), frame_period=args.f0_period/1000) return f0
def basic_analysis(wav, sample_rate): pm_times, pm, f0_times, f0, corr = pyreaper.reaper(wav, sample_rate) f0 = f0.reshape((-1, 1)) return f0
def wav2world( wave, fs, mcep_order=25, f0_smoothing=0, ap_smoothing=0, mcep_smoothing=0, frame_period=None, f0_floor=None, f0_ceil=None, f0_mode="harvest"): # setup default values wave = wave.astype('float64') frame_period = pyworld.default_frame_period \ if frame_period is None else frame_period f0_floor = pyworld.default_f0_floor if f0_floor is None else f0_floor f0_ceil = pyworld.default_f0_ceil if f0_ceil is None else f0_ceil alpha = pysptk.util.mcepalpha(fs) # f0 if f0_mode == "harvest": f0, t = pyworld.harvest( wave, fs, f0_floor=f0_floor, f0_ceil=f0_ceil, frame_period=frame_period) threshold = 0.85 elif f0_mode == "reaper": _, _, t, f0, _ = reaper( (wave * (2**15 - 1)).astype("int16"), fs, frame_period=frame_period / 1000, do_hilbert_transform=True) t, f0 = t.astype('float64'), f0.astype('float64') threshold = 0.1 elif f0_mode == "dio": _f0, t = pyworld.dio(wave, fs) f0 = pyworld.stonemask(wave, _f0, t, fs) threshold = 0.0 else: raise ValueError # world sp = pyworld.cheaptrick(wave, f0, t, fs) ap = pyworld.d4c(wave, f0, t, fs, threshold=threshold) # extract vuv from ap vuv_flag = (ap[:, 0] < 0.5) * (f0 > 1.0) vuv = vuv_flag.astype('int') # continuous log f0 clf0 = np.zeros_like(f0) if vuv_flag.any(): if not vuv_flag[0]: f0[0] = f0[vuv_flag][0] vuv_flag[0] = True if not vuv_flag[-1]: f0[-1] = f0[vuv_flag][-1] vuv_flag[-1] = True idx = np.arange(len(f0)) clf0[idx[vuv_flag]] = np.log( np.clip(f0[idx[vuv_flag]], f0_floor / 2, f0_ceil * 2)) clf0[idx[~vuv_flag]] = interp1d( idx[vuv_flag], clf0[idx[vuv_flag]] )(idx[~vuv_flag]) if f0_smoothing > 0: clf0 = modspec_smoothing( clf0, 1000 / frame_period, cut_off=f0_smoothing) else: clf0 = np.ones_like(f0) * f0_floor # continuous coded ap cap = pyworld.code_aperiodicity(ap, fs) if ap_smoothing > 0: cap = modspec_smoothing(cap, 1000 / frame_period, cut_off=ap_smoothing) # mcep mcep = pysptk.mcep(sp, order=mcep_order, alpha=alpha, itype=4) if mcep_smoothing > 0: mcep = modspec_smoothing( mcep, 1000 / frame_period, cut_off=mcep_smoothing) fbin = sp.shape[1] return mcep, clf0, vuv, cap, sp, fbin, t