示例#1
0
def extract_f0(waveform, fs=16000, f0_min=30, f0_max=550, harmonics=10., voicing=50., configuration="pitch_tracker"):
    """Extract F0 from a waveform

    """
    # first determine f0 without limits, then use mean and std of the first estimate
    # to limit search range.
    if (f0_min == 0) or (f0_max == 0):
        if USE_REAPER and (configuration == "REAPER"):
            _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max)
        else:
            (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200)

        mean_f0 = np.mean(f0[f0 > 0])
        std_f0 = np.std(f0[f0 > 0])
        f0_min = max((mean_f0 - 3*std_f0, 40.0))
        f0_max = mean_f0 + 6*std_f0

        logger.debug("f0_min = %f, f0_max = %f" % (f0_min, f0_max))

    if USE_REAPER and (configuration == "REAPER"):
        _, _, _, f0, _ = pyreaper.reaper(waveform, fs, f0_min, f0_max)
    else:
        (f0, _) = pitch_tracker.inst_freq_pitch(waveform, fs, f0_min, f0_max, harmonics, voicing, False, 200)

    return f0
    def reaper(self):
        """Extracts f0 using REAPER.

        Note VUV in F0 is represented using -1.0

        Returns:
            (np.ndarray[n_frames]): fundamental frequency.
        """
        pm_times, pm, f0_times, f0, corr = pyreaper.reaper(
            self.data, self.sample_rate)
        return f0
示例#3
0
def extract_f0(y, sampling_rate, f0_floor, f0_ceil, frame_shift_ms, use_reaper=True):
    if use_reaper:
        pm_times, pm, f0_times, f0, corr = pyreaper.reaper(
            y,
            fs=sampling_rate,
            minf0=f0_floor,
            maxf0=f0_ceil,
            frame_period=frame_shift_ms / 1000.0)
        f0[f0 == -1.] = 0.
    else:
        f0 = pysptk.sptk.rapt(
            y.astype(np.float32),
            fs=sampling_rate,
            min=f0_floor,
            max=f0_ceil,
            hopsize=int(frame_shift_ms / 1000.0 * sampling_rate),
            voice_bias=0.3)
    return f0.astype(np.float32)
示例#4
0
文件: picking.py 项目: yuji1997/4dx
def voice_analysis():
    # Get command line
    FILENAME = sys.argv[1]
    WAV_FILE = "../wav/"+FILENAME+".wav"
    option = sys.argv[2:]

    # Anlysis
    fs, row_data = wavfile.read(WAV_FILE)
    pm_times, pm, f0_times, f0, corr = pyreaper.reaper(row_data, fs)
    _time = pm_times[-1]

    # Save time length
    SAVE_FILENAME_NPY = "../time_arr/"+FILENAME+"_time.npy"
    np.save(SAVE_FILENAME_NPY, np.array([_time]))

    SAVE_FILENAME_TXT = "../time_arr/"+FILENAME+"_time.txt"
    np.savetxt(SAVE_FILENAME_TXT, np.array([_time]), fmt='%d', delimiter=',')


    # -debug option
    if "-debug" in option:
        fig,axes = plt.subplots(nrows=2,ncols=2,figsize=(14,10))
        # row_data graph
        axes[0,0].plot(row_data, label="Row_data")
        axes[0,0].legend(fontsize=10)

        # freq graph
        axes[0,1].plot(pm_times, pm, linewidth=3, color="red", label="Pitch mark")
        axes[0,1].legend(fontsize=10)

        # pirch mark graph
        axes[1,0].plot(f0_times, f0, linewidth=3, color="green", label="F0 contour")
        axes[1,0].legend(fontsize=10)

        # corr graph
        axes[1,1].plot(f0_times, corr, linewidth=3, color="blue", label="Correlations")
        axes[1,1].legend(fontsize=10)

        plt.show();

    return row_data, f0, f0_times, _time
示例#5
0
def pyreaper_pitch(wavdata_int, fs, frame_shift, max_pitch, min_pitch,
                   high_pass, hilbert_transform, inter_mark):
    """Return F0 vector estimated by pyreaper (Python package) along with
       corresponding times

    Args:
        wavdata_int       - 16-bit integer data from WAV file [NumPy array]
        fs                - Sampling frequency in Hz [integer]
        frame_shift       - Length of each frame in ms [integer]
        max_pitch         - Maximum valid F0 allowed in Hz [integer]
        min_pitch         - Minimum valid F0 allowed in Hz [integer]
        high_pass         - Whether to apply high pass filter (80 Hz) to input
                            [Boolean]
        hilbert_transform - Whether to apply Hilbert transform that may reduce
                            phase distortion [Boolean]
        inter_mark        - Regular inter-mark interval to use in unvoiced
                            pitchmark regions given in milliseconds [integer]
    Returns:
        F0_times          - Times corresponding to F0 estimates [NumPy vector]
        F0                - F0 estimates [NumPy vector]
    """
    try:
        from pyreaper import reaper
        pm_times, pm, F0_times, F0, corr = reaper(wavdata_int, fs, minf0=min_pitch,
                                                  maxf0=max_pitch,
                                                  do_high_pass=high_pass,
                                                  do_hilbert_transform=hilbert_transform,
                                                  inter_pulse=inter_mark / 1000.0,
                                                  frame_period=frame_shift / 1000.0)
    except ImportError: # pragma: no cover
        print("Need Python library pyreaper.  Is it installed?")

    # Replace invalid measurements with NaN
    F0[F0 < 0] = np.nan

    return F0_times, F0
示例#6
0
def test_reaper():
    N = 10
    for n in tqdm(range(N)):
        fs, x = wavfile.read(join(dirname(__file__), "test16k.wav"))
        pm_times, pm, f0_times, f0, corr = reaper(x, fs)
示例#7
0
def get_pitch(fs, x):
    pm_times, pm, f0_times, f0, corr = pyreaper.reaper(x, fs)
    values, counts = np.unique(f0, return_counts=True)
    return values[counts.argmax()]
def F0(data, rate):
    pm_times, pm, f0_times, f0, corr = \
        pyreaper.reaper(data, rate, minf0=min(args.f0_range), maxf0=max(args.f0_range), frame_period=args.f0_period/1000)
    return f0
示例#9
0
def basic_analysis(wav, sample_rate):
    pm_times, pm, f0_times, f0, corr = pyreaper.reaper(wav, sample_rate)
    f0 = f0.reshape((-1, 1))

    return f0
示例#10
0
def wav2world(
        wave, fs,
        mcep_order=25, f0_smoothing=0,
        ap_smoothing=0, mcep_smoothing=0,
        frame_period=None, f0_floor=None, f0_ceil=None,
        f0_mode="harvest"):
    # setup default values
    wave = wave.astype('float64')

    frame_period = pyworld.default_frame_period \
        if frame_period is None else frame_period
    f0_floor = pyworld.default_f0_floor if f0_floor is None else f0_floor
    f0_ceil = pyworld.default_f0_ceil if f0_ceil is None else f0_ceil
    alpha = pysptk.util.mcepalpha(fs)

    # f0
    if f0_mode == "harvest":
        f0, t = pyworld.harvest(
            wave, fs,
            f0_floor=f0_floor, f0_ceil=f0_ceil,
            frame_period=frame_period)
        threshold = 0.85

    elif f0_mode == "reaper":
        _, _, t, f0, _ = reaper(
            (wave * (2**15 - 1)).astype("int16"),
            fs, frame_period=frame_period / 1000,
            do_hilbert_transform=True)
        t, f0 = t.astype('float64'), f0.astype('float64')
        threshold = 0.1

    elif f0_mode == "dio":
        _f0, t = pyworld.dio(wave, fs)
        f0 = pyworld.stonemask(wave, _f0, t, fs)
        threshold = 0.0

    else:
        raise ValueError

    # world
    sp = pyworld.cheaptrick(wave,  f0, t, fs)
    ap = pyworld.d4c(wave, f0, t, fs, threshold=threshold)

    # extract vuv from ap
    vuv_flag = (ap[:, 0] < 0.5) * (f0 > 1.0)
    vuv = vuv_flag.astype('int')

    # continuous log f0
    clf0 = np.zeros_like(f0)
    if vuv_flag.any():
        if not vuv_flag[0]:
            f0[0] = f0[vuv_flag][0]
            vuv_flag[0] = True
        if not vuv_flag[-1]:
            f0[-1] = f0[vuv_flag][-1]
            vuv_flag[-1] = True

        idx = np.arange(len(f0))
        clf0[idx[vuv_flag]] = np.log(
            np.clip(f0[idx[vuv_flag]], f0_floor / 2, f0_ceil * 2))
        clf0[idx[~vuv_flag]] = interp1d(
            idx[vuv_flag], clf0[idx[vuv_flag]]
        )(idx[~vuv_flag])

        if f0_smoothing > 0:
            clf0 = modspec_smoothing(
                clf0, 1000 / frame_period, cut_off=f0_smoothing)
    else:
        clf0 = np.ones_like(f0) * f0_floor

    # continuous coded ap
    cap = pyworld.code_aperiodicity(ap, fs)

    if ap_smoothing > 0:
        cap = modspec_smoothing(cap, 1000 / frame_period, cut_off=ap_smoothing)

    # mcep
    mcep = pysptk.mcep(sp, order=mcep_order, alpha=alpha, itype=4)

    if mcep_smoothing > 0:
        mcep = modspec_smoothing(
            mcep, 1000 / frame_period, cut_off=mcep_smoothing)

    fbin = sp.shape[1]
    return mcep, clf0, vuv, cap, sp, fbin, t