示例#1
0
def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0):

    #Pre:
    temp_sp = lu.ins_pid('temp.sp')
    temp_mgc = lu.ins_pid('temp.mgc')

    # Writing input data:
    lu.write_binfile(m_sp, temp_sp)

    if fft_len is 0:  # case fft automatic
        fft_len = 2 * (np.size(m_sp, 1) - 1)

    # MCEP:
    curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % (
        alpha, n_coeffs - 1, fft_len, in_type, temp_sp, temp_mgc)
    call(curr_cmd, shell=True)

    # Read MGC File:
    m_mgc = lu.read_binfile(temp_mgc, n_coeffs)

    # Deleting temp files:
    os.remove(temp_sp)
    os.remove(temp_mgc)

    #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc

    return m_mgc
示例#2
0
def sp_to_mcep(m_sp, n_coeffs=60, alpha=0.77, in_type=3, fft_len=0):

    #Pre:
    temp_sp  =  lu.ins_pid('temp.sp')
    temp_mgc =  lu.ins_pid('temp.mgc')
    
    # Writing input data:
    lu.write_binfile(m_sp, temp_sp)

    if fft_len is 0: # case fft automatic
        fft_len = 2*(np.size(m_sp,1) - 1)

    # MCEP:      
    curr_cmd = _sptk_mcep_bin + " -a %1.2f -m %d -l %d -e 1.0E-8 -j 0 -f 0.0 -q %d %s > %s" % (alpha, n_coeffs-1, fft_len, in_type, temp_sp, temp_mgc)
    call(curr_cmd, shell=True)
    
    # Read MGC File:
    m_mgc = lu.read_binfile(temp_mgc , n_coeffs)
    
    # Deleting temp files:
    os.remove(temp_sp)
    os.remove(temp_mgc)
    
    #$sptk/mcep -a $alpha -m $mcsize -l $nFFT -e 1.0E-8 -j 0 -f 0.0 -q 3 $sp_dir/$sentence.sp > $mgc_dir/$sentence.mgc
    
    return m_mgc
def analysis(wav_file, fft_len):
    est_file = lu.ins_pid('temp.est')
    la.reaper(wav_file_orig, est_file)
    m_mag, m_real, m_imag, v_shift, v_voi, m_frm, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files_raw(wav_file, est_file, fft_len)
    v_f0 = mp.shift_to_f0(v_shift, v_voi, fs, out='f0', b_smooth=True)
    os.remove(est_file)
    return m_mag, m_real, m_imag, v_f0
def convert(file_id_list,
            in_lab_dir,
            in_feats_dir,
            fs,
            out_lab_dir,
            b_prevent_zeros=False):
    '''
    b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least.
    (not recommended, only useful when there are too many utterances crashed)
    '''

    # Conversion:
    lu.mkdir(out_lab_dir)
    v_filenames = lu.read_text_file2(file_id_list,
                                     dtype='string',
                                     comments='#')

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for filename in v_filenames:

        # Display:
        print('\nConverting lab file: ' + filename +
              '................................')

        # Current i/o files:
        in_lab_file = os.path.join(in_lab_dir, filename + '.lab')
        out_lab_file = os.path.join(out_lab_dir, filename + '.lab')

        in_shift_file = os.path.join(in_feats_dir, filename + '.shift')

        # Debug:
        '''
        v_shift  = lu.read_binfile(in_shift_file, dim=1)
        v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)
        la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)
        #'''
        v_n_frms = 0
        try:
            v_shift = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(
                v_shift,
                in_lab_file,
                fs,
                b_prevent_zeros=b_prevent_zeros,
                n_states_x_phone=1)
            la.convert_label_state_align_to_var_frame_rate(
                in_lab_file, v_n_frms, out_lab_file)
        except (KeyboardInterrupt, SystemExit):
            raise

        except:
            print("crashlist")
            with open(crashlist_file, "a") as crashlistlog:
                crashlistlog.write(filename + '\n')
    print('Done!')
def analysis(wav_file, fft_len, mvf, nbins_mel=60, nbins_phase=45):
    est_file = lu.ins_pid('temp.est')
    la.reaper(wav_file, est_file)
    m_mag_mel_log, m_real_mel, m_imag_mel, v_shift, v_lf0, fs = mp.analysis_with_del_comp__ph_enc__f0_norm__from_files2(
        wav_file,
        est_file,
        fft_len,
        mvf,
        f0_type='lf0',
        mag_mel_nbins=nbins_mel,
        cmplx_ph_mel_nbins=nbins_phase)
    os.remove(est_file)
    return m_mag_mel_log, m_real_mel, m_imag_mel, v_lf0
示例#6
0
def get_pitch_marks(v_sig, fs):

    temp_wav = lu.ins_pid('temp.wav')
    temp_pm = lu.ins_pid('temp.pm')

    sf.write(temp_wav, v_sig, fs)
    reaper(temp_wav, temp_pm)
    v_pm = np.loadtxt(temp_pm, skiprows=7)
    v_pm = v_pm[:, 0]

    # Protection against REAPER bugs 1:
    vb_correct = np.hstack((True, np.diff(v_pm) > 0))
    v_pm = v_pm[vb_correct]

    # Protection against REAPER bugs 2 (maybe I need a better protection):
    if (v_pm[-1] * fs) >= (np.size(v_sig) - 1):
        v_pm = v_pm[:-1]

    # Removing temp files:
    os.remove(temp_wav)
    os.remove(temp_pm)

    return v_pm
示例#7
0
def get_pitch_marks(v_sig, fs):
    
    temp_wav = lu.ins_pid('temp.wav')
    temp_pm  = lu.ins_pid('temp.pm')
        
    sf.write(temp_wav, v_sig, fs)
    reaper(temp_wav, temp_pm)
    v_pm = np.loadtxt(temp_pm, skiprows=7)
    v_pm = v_pm[:,0]
    
    # Protection against REAPER bugs 1:
    vb_correct = np.hstack(( True, np.diff(v_pm) > 0))
    v_pm = v_pm[vb_correct]
    
    # Protection against REAPER bugs 2 (maybe I need a better protection):
    if (v_pm[-1] * fs) >= (np.size(v_sig)-1):
        v_pm = v_pm[:-1]
    
    # Removing temp files:
    os.remove(temp_wav)
    os.remove(temp_pm)
    
    return v_pm
def convert(file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir, b_prevent_zeros=False):

    '''
    b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least.
    (not recommended, only useful when there are too many utterances crashed)
    '''

    # Conversion:
    lu.mkdir(out_lab_dir)
    v_filenames = lu.read_text_file2(file_id_list, dtype='string', comments='#')

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for filename in v_filenames:

        # Display:
        print('\nConverting lab file: ' + filename + '................................')

        # Current i/o files:
        in_lab_file   = os.path.join(in_lab_dir  , filename + '.lab')
        out_lab_file  = os.path.join(out_lab_dir , filename + '.lab')

        in_shift_file = os.path.join(in_feats_dir, filename + '.shift')


        # Debug:
        '''
        v_shift  = lu.read_binfile(in_shift_file, dim=1)
        v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)
        la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)
        #'''

        try:
            v_shift  = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros)

            la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file)

        except (KeyboardInterrupt, SystemExit):
            raise

        except:
            with open(crashlist_file, "a") as crashlistlog:
                crashlistlog.write(filename + '\n')

    print('Done!')
示例#9
0
    # CONSTANTS: So far, the vocoder has been tested only with the following constants:===
    fs = 48000

    # INPUT:==============================================================================
    files_scp = '../data/file_id.scp'  # List of file names (tokens). Format used by Merlin.
    in_lab_st_dir = '../data/labs'  # Original state aligned label files directory (in the format used by Merlin).
    in_shift_dir = '../data/params'  # Directory containing .shift files (You need to run feature extraction before running this script.)
    out_lab_st_dir = '../data/labs_var_rate'  # Directory that will contain the converted "variable frame rate" state aligned label files.
    b_prevent_zeros = False  # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed)

    # PROCESSING:=========================================================================
    lu.mkdir(out_lab_st_dir)
    v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#')
    n_files = len(v_fileTokns)

    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for ftkn in v_fileTokns:

        # Display:
        print('\nAnalysing file: ' + ftkn + '................................')

        # Input files:
        in_lab_st_file = in_lab_st_dir + '/' + ftkn + '.lab'
        out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab'
        in_shift_file = in_shift_dir + '/' + ftkn + '.shift'

        try:
            v_shift = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(
                v_shift,
                in_lab_st_file,
    fs = 48000

    # INPUT:==============================================================================
    files_scp      = '../data_48k/file_id.scp'   # List of file names (tokens). Format used by Merlin.
    in_lab_st_dir  = '../data_48k/labs'          # Original state aligned label files directory (in the format used by Merlin).
    in_shift_dir   = '../data_48k/params'        # Directory containing .shift files (You need to run feature extraction before running this script.)
    out_lab_st_dir = '../data_48k/labs_var_rate' # Directory that will contain the converted "variable frame rate" state aligned label files.
    b_prevent_zeros = False                  # True if you want to ensure that all the phonemes have one frame at least. (not recommended, only usful when there are too many utterances crashed)


    # PROCESSING:=========================================================================
    lu.mkdir(out_lab_st_dir)
    v_fileTokns = lu.read_text_file2(files_scp, dtype='string', comments='#')
    n_files = len(v_fileTokns)
    
    crashlist_file = lu.ins_pid('crash_file_list.scp')
    for ftkn in v_fileTokns:
        
        # Display:
        print('\nAnalysing file: ' + ftkn + '................................')
        
        # Input files:
        in_lab_st_file  = in_lab_st_dir  + '/' + ftkn + '.lab'
        out_lab_st_file = out_lab_st_dir + '/' + ftkn + '.lab'
        in_shift_file   = in_shift_dir   + '/' + ftkn + '.shift'

        try:
            v_shift  = lu.read_binfile(in_shift_file, dim=1)
            v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_st_file, fs, b_prevent_zeros=b_prevent_zeros, n_states_x_phone=5, nfrms_tolerance=6)

            # Extraction: