示例#1
0
def get_world_feats(vocals):
    feats = pw.wav2world(vocals, config.fs, frame_period=config.hoptime * 1000)

    ap = feats[2].reshape([feats[1].shape[0],
                           feats[1].shape[1]]).astype(np.float32)
    ap = 10. * np.log10(ap**2)
    harm = 10 * np.log10(feats[1].reshape(
        [feats[2].shape[0], feats[2].shape[1]]))
    f0 = feats[0]

    # f0 = pitch.extract_f0_sac(vocals, fs, config.hoptime)

    y = 69 + 12 * np.log2(f0 / 440)
    # import pdb;pdb.set_trace()
    # y = hertz_to_new_base(f0)
    nans, x = utils.nan_helper(y)
    naners = np.isinf(y)
    y[nans] = np.interp(x(nans), x(~nans), y[~nans])
    # y=[float(x-(min_note-1))/float(max_note-(min_note-1)) for x in y]
    y = np.array(y).reshape([len(y), 1])
    guy = np.array(naners).reshape([len(y), 1])
    y = np.concatenate((y, guy), axis=-1)

    if config.comp_mode == 'mfsc':
        harmy = sp_to_mfsc(harm, 60, 0.45)
        apy = sp_to_mfsc(ap, 4, 0.45)
    elif config.comp_mode == 'mgc':
        harmy = sp_to_mgc(harm, 60, 0.45)
        apy = sp_to_mgc(ap, 4, 0.45)

    out_feats = np.concatenate((harmy, apy, y.reshape((-1, 2))), axis=1)

    return out_feats
示例#2
0
def stft_to_feats(vocals, fs, mode=config.comp_mode):
    if len(vocals.shape)>1:
        vocals = vocals[:,0]
        vocals = np.ascontiguousarray(vocals)

    feats=pw.wav2world(vocals,fs,frame_period=5.80498866)

    ap = feats[2].reshape([feats[1].shape[0],feats[1].shape[1]]).astype(np.float32)
    ap = 10.*np.log10(ap**2)
    harm=10*np.log10(feats[1].reshape([feats[2].shape[0],feats[2].shape[1]]))
    f0 = feats[0]
    # f0_1 = pitch.extract_f0_sac(vocals, fs, 0.00580498866)

    # import pdb;pdb.set_trace()

    y=69+12*np.log2(f0/440)
    # y = hertz_to_new_base(f0)
    nans, x= nan_helper(y)
    naners=np.isinf(y)
    y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    # y=[float(x-(min_note-1))/float(max_note-(min_note-1)) for x in y]
    y=np.array(y).reshape([len(y),1])
    guy=np.array(naners).reshape([len(y),1])
    y=np.concatenate((y,guy),axis=-1)

    if mode == 'mfsc':
        harmy=sp_to_mfsc(harm,60,0.45)
        apy=sp_to_mfsc(ap,4,0.45)
    elif mode == 'mgc':
        harmy=sp_to_mgc(harm,60,0.45)
        apy=sp_to_mgc(ap,4,0.45)

    # import pdb;pdb.set_trace()


    out_feats=np.concatenate((harmy,apy,y.reshape((-1,2))),axis=1) 

    # harm_in=mgc_to_sp(harmy, 1025, 0.45)
    # ap_in=mgc_to_sp(apy, 1025, 0.45)


    return out_feats
def stft_to_feats(vocals, fs = config.fs):
    if len(vocals.shape)>1:
        vocals = vocals[:,0]
        vocals = np.ascontiguousarray(vocals)

    feats=pw.wav2world(vocals,fs = config.fs,frame_period=config.hoptime*1000)

    ap = feats[2].reshape([feats[1].shape[0],feats[1].shape[1]]).astype(np.float32)
    ap = 10.*np.log10(ap**2)
    harm=10 * np.log10(feats[1].reshape([feats[2].shape[0],feats[2].shape[1]]))
    harm = harm - 20
    f0 = feats[0]

    is_voiced = f0 > 0.0
    if not np.any(is_voiced):
        pass  # all unvoiced, do nothing
    else:
        for k in range(ap.shape[1]):
            ap[~is_voiced, k] = np.interp(np.where(~is_voiced)[0], np.where(is_voiced)[0], ap[is_voiced, k])
    # f0_1 = pitch.extract_f0_sac(vocals, fs, 0.00580498866)

    

    y=69+12*np.log2(f0/440)
    # y = hertz_to_new_base(f0)
    nans, x= nan_helper(y)
    naners=np.isinf(y)
    y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    # y=[float(x-(min_note-1))/float(max_note-(min_note-1)) for x in y]
    y=np.array(y).reshape([len(y),1])
    guy=np.array(naners).reshape([len(y),1])
    y=np.concatenate((y,guy),axis=-1)

    harm = np.nan_to_num(harm)

    ap = np.nan_to_num(ap)

    harmy= sp_to_mfsc(harm+1e-12,60,0.45)
    apy= ap_to_wbap(ap+1e-12,4,config.fs)

    


    out_feats=np.concatenate((harmy,apy,y.reshape((-1,2))),axis=1) 

    # import pdb;pdb.set_trace()

    # audio_out = feats_to_audio(out_feats)

    # sf.write('./test_mfsc.wav', audio_out, config.fs)



    # import pdb;pdb.set_trace()

    # harm_in=mgc_to_sp(harmy, 1025, 0.45)
    # ap_in= wbap_to_ap(apy, 1025, config.fs)

    # harm_in = 10**((harm_in + 20)/10)
    # ap_in = np.clip(10**(ap_in/20), 0.0, 1.0)

    # audio_out = pw.synthesize(f0 , np.ascontiguousarray(harm_in).astype('double') , np.ascontiguousarray(ap).astype('double'),config.fs,config.hoptime*1000)

    # sf.write('./test.wav', audio_out, config.fs)

    return out_feats, f0