def gen_W(number_of_frame, dur_list, num_coeff): w = np.zeros((num_coeff * len(dur_list), number_of_frame)) offset_x = 0 offset_y = 0 for idx, d in enumerate(dur_list): if idx == (len(dur_list) - 1): d = number_of_frame - offset_x local_w = PoGUtility.generate_W_for_DCT(d, num_coeff) # print offset_x, offset_y, local_w.shape for i in range(num_coeff): w[offset_y + i][offset_x:offset_x + d] = local_w[i] offset_x = offset_x + d offset_y = offset_y + num_coeff return w pass
head_count, tail_count = find_head_and_tail(phone['raw_lf0']) raw = np.array(phone['raw_lf0'])[head_count : len(phone['raw_lf0'])-tail_count] # print raw if len(raw) < 10: continue else: if len(raw[raw<0]) != 0: # print raw raw = Utility.inteporate_lf0(raw) # print raw phone['clean_raw'] = raw phone_clean_dict[phone_id] = phone w = PoGUtility.generate_W_for_DCT(len(raw), coeff) data_dct = np.dot(w, raw) # print data_dct phone['dct'] = data_dct # sys.exit() print len(phone_clean_dict) Utility.save_obj(phone_clean_dict, new_phone_dict) pass
def gen_W(number_of_frame, dur_list, num_coeff, stress_list, vuv, tone, stress_type, p_in_s_file, use_consonant=False): # print stress_list w = np.zeros((num_coeff * len(dur_list), number_of_frame)) offset_x = 0 offset_y = 0 for idx, (d, p_in_s, st) in enumerate(zip(dur_list, p_in_s_file, stress_list)): if idx == (len(dur_list) - 1): d = number_of_frame - offset_x if use_consonant: if len(p_in_s) == 0: consonant = 0 else: consonant = int(float(p_in_s[0]) / float(50000.0)) cur_vuv = vuv[offset_x + consonant:offset_x + d] head, tail = PoGUtility.find_head_and_tail(cur_vuv) if (((consonant + tail + head) > d) | (not (st[0] == '1')) | (not (st[1] == '{}'.format(tone)))): local_w = np.zeros((num_coeff, d)) else: # print 'panda' voice_frame = d - consonant - tail - head local_w = PoGUtility.generate_W_for_DCT( voice_frame, num_coeff) if head != 0: local_w = np.concatenate((np.zeros( (num_coeff, head), dtype='float'), local_w), axis=1) if tail != 0: local_w = np.concatenate( (local_w, np.zeros( (num_coeff, tail), dtype='float')), axis=1) if consonant != 0: local_w = np.concatenate((np.zeros( (num_coeff, consonant), dtype='float'), local_w), axis=1) else: cur_vuv = vuv[offset_x:offset_x + d] head, tail = PoGUtility.find_head_and_tail(cur_vuv) if not (st[0] == '1'): st[0] = '0' if (((tail + head) > d) | (not (st[0] == str(stress_type))) | (not (st[1] == '{}'.format(tone)))): local_w = np.zeros((num_coeff, d)) else: # print 'panda' voice_frame = d - tail - head local_w = PoGUtility.generate_W_for_DCT( voice_frame, num_coeff) if head != 0: local_w = np.concatenate((np.zeros( (num_coeff, head), dtype='float'), local_w), axis=1) if tail != 0: local_w = np.concatenate( (local_w, np.zeros( (num_coeff, tail), dtype='float')), axis=1) for i in range(num_coeff): w[offset_y + i][offset_x:offset_x + d] = local_w[i] offset_x = offset_x + d offset_y = offset_y + num_coeff return w
errors = dict() errors_list = [] errors_tuple = [] true = np.array([]) dct_regen = np.array([]) for coeff in [3, 4, 7]: for name in d: data = d[name] w = PoGUtility.generate_W_for_DCT(len(data), coeff) data_dct = PoGUtility.generate_DCT(data, coeff) data_dct = np.dot(w, data) i_dct = PoGUtility.generate_inverse_DCT(data_dct, len(data)) true = np.concatenate((true, data)) dct_regen = np.concatenate((dct_regen, i_dct)) rmse = np.sqrt(sklearn.metrics.mean_squared_error( data, i_dct)) * 1200 / np.log(2) # print rmse errors[name] = rmse errors_list.append(rmse)
from scipy.fftpack import dct, idct import math if __name__ == '__main__': syllable_dict = Utility.load_obj( '/work/w2/decha/Data/GPR_speccom_data/Interspeech2017/syllable_dictionary_data_with_delta_deltadelta.pkl' ) for syl in syllable_dict: print syl lf0 = syllable_dict[syl][0] y = 0.0 for n, f in enumerate(lf0): y = y + f * math.cos(math.pi * 0 * (2.0 * n + 1) / (2.0 * len(lf0))) W = PoGUtility.generate_W_for_DCT(len(lf0), len(lf0)) lf0_dct = dct(lf0, norm='ortho') print lf0_dct sys.exit() pass