def generate_lf0(source_f0_folder, target_lf0_folder, file_lengths): io_funcs = BinaryIOCollection() for file_id, file_length in file_lengths.iteritems(): source_file_path = os.path.join(source_f0_folder, '{}.f0'.format(file_id)) target_file_path = os.path.join(target_lf0_folder, '{}.lf0'.format(file_id)) with open(source_file_path, 'rt') as handle: lines = handle.readlines() f0s = [float(line) for line in lines] if len(lines) <= file_length: f0s += [0.] * (file_length - len(lines)) else: f0s = f0s[:file_length] lf0s = [-1e+10] * file_length for i in xrange(file_length): if f0s[i] <= 10.0: continue lf0s[i] = math.log(f0s[i]) print(target_file_path) io_funcs.array_to_binary_file(lf0s, target_file_path) print
def load_covariance(self, var_file_dict, out_dimension_dict): io_funcs = BinaryIOCollection() for feature_name in list(var_file_dict.keys()): var_values, dimension = io_funcs.load_binary_file_frame(var_file_dict[feature_name], 1) var_values = numpy.reshape(var_values, (out_dimension_dict[feature_name], 1)) self.var[feature_name] = var_values
def shift_for_one_utterance(self, utt, feat_dim, semi): if semi == 0: return os.path.basename(utt) io_funcs = BinaryIOCollection() feat, num_frame = io_funcs.load_binary_file_frame(utt, feat_dim) for f in feat: self.shift_pitch_feat(f[curr_start_ind: curr_end_ind + 1], semi) self.shift_pitch_feat(f[prev_start_ind: prev_end_ind + 1], semi) self.shift_pitch_feat(f[next_start_ind: next_end_ind + 1], semi) if semi > 0: filename = utt + '_u' + str(semi) else: filename = utt + '_d' + str(-semi) io_funcs.array_to_binary_file(feat, filename) return os.path.basename(filename)
class AlignFeats(object): def __init__(self): self.io_funcs = BinaryIOCollection() def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict): ''' align source feats as per the dtw path (matching target length) ''' src_features, frame_number = self.io_funcs.load_binary_file_frame(src_feat_file, feat_dim) tgt_length = len(dtw_path_dict) src_aligned_features = numpy.zeros((tgt_length, feat_dim)) for i in range(tgt_length): src_aligned_features[i, ] = src_features[dtw_path_dict[i]] self.io_funcs.array_to_binary_file(src_aligned_features, src_aligned_feat_file)
def duration_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict): logger = logging.getLogger('param_generation') logger.debug('duration_decomposition for %d files' % len(in_file_list)) state_number = 5 ## hard coding, try removing in future? if len(list(out_dimension_dict.keys())) > 1: logger.critical( "we don't support any additional features along with duration as of now." ) sys.exit(1) else: feature_name = list(out_dimension_dict.keys())[0] io_funcs = BinaryIOCollection() findex = 0 flen = len(in_file_list) for file_name in in_file_list: findex = findex + 1 dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] features, frame_number = io_funcs.load_binary_file_frame( file_name, dimension) gen_features = numpy.int32(numpy.round(features)) gen_features[gen_features < 1] = 1 if dimension > state_number: gen_features = gen_features[:, state_number] logger.info('processing %4d of %4d: %s' % (findex, flen, file_name)) new_file_name = os.path.join( dir_name, file_id + file_extension_dict[feature_name]) io_funcs.array_to_binary_file(gen_features, new_file_name) logger.debug('wrote to file %s' % new_file_name)
class AlignFeats(object): def __init__(self): self.io_funcs = BinaryIOCollection() def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict): ''' align source feats as per the dtw path (matching target length) ''' src_features, frame_number = self.io_funcs.load_binary_file_frame( src_feat_file, feat_dim) tgt_length = len(dtw_path_dict) src_aligned_features = numpy.zeros((tgt_length, feat_dim)) for i in range(tgt_length): src_aligned_features[i, ] = src_features[dtw_path_dict[i]] self.io_funcs.array_to_binary_file(src_aligned_features, src_aligned_feat_file)
def make_equal_frames(self, in_file_list, ref_file_list, in_dimension_dict): logger = logging.getLogger("test") logger.info('making equal number of lines...') io_funcs = BinaryIOCollection() utt_number = len(in_file_list) for i in xrange(utt_number): in_file_name = in_file_list[i] in_data_stream_name = in_file_name.split('.')[-1] in_feature_dim = in_dimension_dict[in_data_stream_name] in_features, in_frame_number = io_funcs.load_binary_file_frame(in_file_name, in_feature_dim) ref_file_name = ref_file_list[i] ref_data_stream_name = ref_file_name.split('.')[-1] ref_feature_dim = in_dimension_dict[ref_data_stream_name] ref_features, ref_frame_number = io_funcs.load_binary_file_frame(ref_file_name, ref_feature_dim) target_features = numpy.zeros((ref_frame_number, in_feature_dim)) if in_frame_number == ref_frame_number: continue; elif in_frame_number > ref_frame_number: target_features[0:ref_frame_number, ] = in_features[0:ref_frame_number, ] elif in_frame_number < ref_frame_number: target_features[0:in_frame_number, ] = in_features[0:in_frame_number, ] io_funcs.array_to_binary_file(target_features, in_file_name) logger.info('Finished: made equal rows in data stream %s with reference to data stream %s ' %(in_data_stream_name, ref_data_stream_name))
def prepare_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): logger = logging.getLogger("acoustic_comp") stream_start_index = {} stream_dim_index = 0 for stream_name in out_dimension_dict.keys(): if not stream_start_index.has_key(stream_name): stream_start_index[stream_name] = stream_dim_index stream_dim_index += out_dimension_dict[stream_name] io_funcs = BinaryIOCollection() for i in xrange(self.file_number): out_file_name = out_file_list[i] #if os.path.isfile(out_file_name): # logger.info('processing file %4d of %4d : %s exists' % (i+1, self.file_number, out_file_name)) # continue logger.info('processing file %4d of %4d : %s' % (i + 1, self.file_number, out_file_name)) out_data_matrix = None out_frame_number = 0 for k in xrange(self.data_stream_number): data_stream_name = self.data_stream_list[k] in_file_name = in_file_list_dict[data_stream_name][i] in_feature_dim = in_dimension_dict[data_stream_name] features, frame_number = io_funcs.load_binary_file_frame( in_file_name, in_feature_dim) if k == 0: out_frame_number = frame_number out_data_matrix = numpy.zeros( (out_frame_number, self.out_dimension)) if frame_number > out_frame_number: features = features[0:out_frame_number, ] frame_number = out_frame_number try: assert out_frame_number == frame_number except AssertionError: logger.critical( 'the frame number of data stream %s is not consistent with others: current %d others %d' % (data_stream_name, out_frame_number, frame_number)) raise dim_index = stream_start_index[data_stream_name] if data_stream_name in ['lf0', 'F0']: ## F0 added for GlottHMM features, vuv_vector = self.interpolate_f0(features) ### if vuv information to be recorded, store it in corresponding column if self.record_vuv: out_data_matrix[0:out_frame_number, stream_start_index['vuv']: stream_start_index['vuv'] + 1] = vuv_vector out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = features dim_index = dim_index + in_feature_dim if self.compute_dynamic[data_stream_name]: delta_features = self.compute_dynamic_matrix( features, self.delta_win, frame_number, in_feature_dim) acc_features = self.compute_dynamic_matrix( features, self.acc_win, frame_number, in_feature_dim) out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = delta_features dim_index = dim_index + in_feature_dim out_data_matrix[0:out_frame_number, dim_index:dim_index + in_feature_dim] = acc_features ### write data to file io_funcs.array_to_binary_file(out_data_matrix, out_file_name) logger.debug(' wrote %d frames of features', out_frame_number)
'vuv' : '.vuv'} var_file_dict = {'mgc':'{}/mgc.var'.format(args.var_dir), 'vuv':'{}/vuv.var'.format(args.var_dir), 'lf0':'{}/lf0.var'.format(args.var_dir)} generator = ParameterGeneration() # out_dimension_dict is the cmp structrue of your nnet output cmp generator.acoustic_decomposition(in_file_list, 127, out_dimension_dict, file_extension_dict, var_file_dict) if not os.path.exists(lf0_dir): os.mkdir(lf0_dir) if not os.path.exists(mgc_dir): os.mkdir(mgc_dir) os.system('mv {}/*.lf0 {}'.format(cmp_dir, lf0_dir)) os.system('mv {}/*.mgc {}'.format(cmp_dir, mgc_dir)) io_funcs = BinaryIOCollection() inf_float = -1.0e+10 for item in os.listdir(cmp_dir): vuv = numpy.reshape(numpy.fromfile(os.path.join(cmp_dir, item), dtype=numpy.float32), [-1,127])[:,123] name, ext = os.path.splitext(item) lf0 = numpy.reshape(numpy.fromfile(os.path.join(lf0_dir, "{}.lf0".format(name)),dtype=numpy.float32),[-1,1]) mgc = numpy.reshape(numpy.fromfile(os.path.join(mgc_dir, "{}.mgc".format(name)),dtype=numpy.float32),[-1,41]) mgc = signal.convolve2d( mgc, [[1.0 / 3], [1.0 / 3], [1.0 / 3]], mode="same", boundary="symm") lf0[vuv < 0.5] = inf_float io_funcs.array_to_binary_file(lf0, os.path.join(lf0_dir, "{}.lf0".format(name))) io_funcs.array_to_binary_file(mgc, os.path.join(mgc_dir, "{}.mgc".format(name)))
def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None): print ('param_generation') print ('acoustic_decomposition for %d files' % len(in_file_list) ) self.load_covariance(var_file_dict, out_dimension_dict) stream_start_index = {} dimension_index = 0 recorded_vuv = True vuv_dimension = None for feature_name in list(out_dimension_dict.keys()): if feature_name != 'vuv': stream_start_index[feature_name] = dimension_index else: vuv_dimension = dimension_index dimension_index += out_dimension_dict[feature_name] io_funcs = BinaryIOCollection() mlpg_algo = MLParameterGeneration() findex=0 flen=len(in_file_list) for file_name in in_file_list: findex=findex+1 dir_name = os.path.dirname(file_name) file_id = os.path.splitext(os.path.basename(file_name))[0] features, frame_number = io_funcs.load_binary_file_frame(file_name, dimension) print('processing %4d of %4d: %s' % (findex,flen,file_name) ) for feature_name in self.gen_wav_features: print(' feature: %s' % feature_name) current_features = features[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]] if FAST_MLPG: ### fast version wants variance per frame, not single global one: var = self.var[feature_name] var = numpy.transpose(numpy.tile(var,frame_number)) else: var = self.var[feature_name] # print var.shape[1] if do_MLPG == False: gen_features = current_features else: gen_features = mlpg_algo.generation(current_features, var, out_dimension_dict[feature_name]//3) print(' feature dimensions: %d by %d' %(gen_features.shape[0], gen_features.shape[1])) if feature_name in ['lf0', 'F0']: if 'vuv' in stream_start_index: vuv_feature = features[:, stream_start_index['vuv']:stream_start_index['vuv']+1] for i in range(frame_number): if new_vuv_feature[i] < 0.5: gen_features[i, 0] = self.inf_float new_file_name = os.path.join(dir_name, file_id + file_extension_dict[feature_name]) if self.enforce_silence: silence_pattern = cfg.silence_pattern label_align_dir = cfg.in_label_align_dir in_f = open(label_align_dir+'/'+file_id+'.lab','r') for line in in_f.readlines(): line = line.strip() if len(line) < 1: continue temp_list = re.split('\s+', line) start_time = int(int(temp_list[0])*(10**-4)/5) end_time = int(int(temp_list[1])*(10**-4)/5) full_label = temp_list[2] label_binary_flag = self.check_silence_pattern(full_label, silence_pattern) if label_binary_flag: if feature_name in ['lf0', 'F0', 'mag']: gen_features[start_time:end_time, :] = self.inf_float else: gen_features[start_time:end_time, :] = 0.0 io_funcs.array_to_binary_file(gen_features, new_file_name) print(' wrote to file %s' % new_file_name)
def compute_distortion(self, file_id_list, reference_dir, generation_dir, file_ext, feature_dim): total_voiced_frame_number = 0 distortion = 0.0 vuv_error = 0 total_frame_number = 0 io_funcs = BinaryIOCollection() ref_all_files_data = numpy.reshape(numpy.array([]), (-1, 1)) gen_all_files_data = numpy.reshape(numpy.array([]), (-1, 1)) for file_id in file_id_list: ref_file_name = reference_dir + '/' + file_id.strip() + file_ext gen_file_name = generation_dir + '/' + file_id.strip() + file_ext ref_data, ref_frame_number = io_funcs.load_binary_file_frame( ref_file_name, feature_dim) gen_data, gen_frame_number = io_funcs.load_binary_file_frame( gen_file_name, feature_dim) if ref_frame_number != gen_frame_number: self.logger.critical( "The number of frames is not the same: %d vs %d. Error in compute_distortion.py\n." % (ref_frame_number, gen_frame_number)) raise if file_ext == '.lf0': ref_all_files_data = numpy.concatenate( (ref_all_files_data, ref_data), axis=0) gen_all_files_data = numpy.concatenate( (gen_all_files_data, gen_data), axis=0) temp_distortion, temp_vuv_error, voiced_frame_number = self.compute_f0_mse( ref_data, gen_data) vuv_error += temp_vuv_error total_voiced_frame_number += voiced_frame_number elif file_ext == '.dur': ref_data = numpy.reshape(numpy.sum(ref_data, axis=1), (-1, 1)) gen_data = numpy.reshape(numpy.sum(gen_data, axis=1), (-1, 1)) ref_all_files_data = numpy.concatenate( (ref_all_files_data, ref_data), axis=0) gen_all_files_data = numpy.concatenate( (gen_all_files_data, gen_data), axis=0) continue elif file_ext == '.mgc': temp_distortion = self.compute_mse(ref_data[:, 1:feature_dim], gen_data[:, 1:feature_dim]) else: temp_distortion = self.compute_mse(ref_data, gen_data) distortion += temp_distortion total_frame_number += ref_frame_number if file_ext == '.dur': dur_rmse = self.compute_rmse(ref_all_files_data, gen_all_files_data) dur_corr = self.compute_corr(ref_all_files_data, gen_all_files_data) return dur_rmse, dur_corr elif file_ext == '.lf0': distortion /= float(total_voiced_frame_number) vuv_error /= float(total_frame_number) distortion = numpy.sqrt(distortion) f0_corr = self.compute_f0_corr(ref_all_files_data, gen_all_files_data) return distortion, f0_corr, vuv_error else: distortion /= float(total_frame_number) return distortion
def __init__(self): self.io_funcs = BinaryIOCollection()
return phone, st_arr, ph_arr, mean_f0_arr def zeros(self, m, n): if(n == 1): arr = np.ndarray((m,), float) else: arr = np.ndarray((m, n), float) arr.fill(0) return arr if __name__ == "__main__": dnn_dir = '/afs/inf.ed.ac.uk/group/cstr/projects/phd/s1432486/work/dnn_tts_blzpilot/' label_align_dir = os.path.join(dnn_dir, 'two_stage_mtldnn/data/label_state_align') htsclass = readHTSlabelFile() io_funcs = BinaryIOCollection() DFP = 1 if DFP: parseLabFile = True; if parseLabFile: filelist = os.path.join(dnn_dir, 'two_stage_mtldnn/data/file_id_list.scp') list_of_files = io_funcs.load_file_list(filelist) max_syl_dur = 0 max_syl_dur_filename = '' for i in range(len(list_of_files)): filename = list_of_files[i] print filename
def load_next_partition(self): print 'Loading Next Partition' sftp = pysftp.Connection('eddie.ecdf.ed.ac.uk',username='******',password='******') i_temp_set = numpy.empty((self.buffer_size, self.n_ins)) o_temp_set = numpy.empty((self.buffer_size, self.n_outs)) current_index = 0 ### first check whether there are remaining data from previous utterance if self.remain_frame_number > 0: remain_frames = self.remain_frame_number - current_index remain_size = self.i_remain_data.shape[0] if remain_frames != remain_size: if remain_frames < remain_size: if remain_size - remain_frames <= 20: print "Check remain_size" else: print "We delete", self.remain_frame_number,"frames" self.remain_frame_number = 0 elif remain_frames > remain_size: self.remain_frame_number = self.remain_frame_number - (remain_frames - remain_size) i_temp_set[current_index:self.remain_frame_number, ] = self.i_remain_data o_temp_set[current_index:self.remain_frame_number, ] = self.o_remain_data current_index += self.remain_frame_number self.remain_frame_number = 0 io_function = BinaryIOCollection() while True: if current_index >= self.buffer_size: break if self.file_index >= self.list_size: self.end_reading = True self.file_index = 0 break sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items = [] while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name," is :", os.path.isfile(i_file_name), "in beginning of load_next_partition" print o_file_name," is :", os.path.isfile(o_file_name), "in beginning of load_next_partition" remove_items.append(i_file_name) remove_items.append(o_file_name) self.file_index +=1 sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) i_features, lab_frame_number = io_function.load_binary_file_frame(i_file_name , self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame(o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] frame_number = lab_frame_number if abs(lab_frame_number - out_frame_number) < 5: ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference if lab_frame_number > out_frame_number: frame_number = out_frame_number else: while abs(lab_frame_number - out_frame_number) > 5: self.file_index += 1 ############# while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name," is :", os.path.isfile(i_file_name), "in ELSE of load_next_partition" print o_file_name," is :", os.path.isfile(o_file_name), "in ELSE of load_next_partition" remove_items.append(i_file_name) remove_items.append(o_file_name) sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) i_features, lab_frame_number = io_function.load_binary_file_frame(i_file_name , self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame(o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] ############# o_features = o_features[0:frame_number, ] i_features = i_features[0:frame_number, ] if current_index + frame_number <= self.buffer_size: i_temp_set[current_index:current_index+frame_number, ] = i_features o_temp_set[current_index:current_index+frame_number, ] = o_features current_index = current_index + frame_number else: ## if current utterance cannot be stored in the block, then leave the remaining part for the next block used_frame_number = self.buffer_size - current_index i_temp_set[current_index:self.buffer_size, ] = i_features[0:used_frame_number, ] o_temp_set[current_index:self.buffer_size, ] = o_features[0:used_frame_number, ] current_index = self.buffer_size self.i_remain_data = i_features[used_frame_number:frame_number, ] self.o_remain_data = o_features[used_frame_number:frame_number, ] self.remain_frame_number = frame_number - used_frame_number self.file_index += 1 sftp.close() i_temp_set = i_temp_set[0:current_index, ] o_temp_set = o_temp_set[0:current_index, ] numpy.random.seed(271639) numpy.random.shuffle(i_temp_set) numpy.random.seed(271639) numpy.random.shuffle(o_temp_set) i_shared_set = self.make_shared(i_temp_set, 'x') o_shared_set = self.make_shared(o_temp_set, 'y') io_shared_set = (i_shared_set, o_shared_set) return io_shared_set, i_temp_set, o_temp_set
def load_next_utterance(self): print("Loading Next Utterance") i_temp_set = numpy.empty((self.buffer_size, self.n_ins)) o_temp_set = numpy.empty((self.buffer_size, self.n_outs)) io_function = BinaryIOCollection() sftp = pysftp.Connection('eddie.ecdf.ed.ac.uk', username='******', password='******') sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items = [] while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name, " is :", os.path.isfile( i_file_name), "in load_next_utterance" print o_file_name, " is :", os.path.isfile( o_file_name), "in load_next_utterance" remove_items.append(i_file_name) remove_items.append(o_file_name) self.file_index += 1 sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) sftp.close() i_features, lab_frame_number = io_function.load_binary_file_frame( i_file_name, self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame( o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] frame_number = lab_frame_number if abs( lab_frame_number - out_frame_number ) < 5: ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference if lab_frame_number > out_frame_number: frame_number = out_frame_number else: print "The number of frames in label and acoustic features are different : ", lab_frame_number, " vs ", out_frame_number raise i_temp_set = o_features[0:frame_number, ] o_temp_set = i_features[0:frame_number, ] self.file_index += 1 if self.file_index >= self.list_size: self.end_reading = True self.file_index = 0 i_shared_set = self.make_shared(i_temp_set, 'x') o_shared_set = self.make_shared(o_temp_set, 'y') io_shared_set = (i_shared_set, o_shared_set) return io_shared_set, i_temp_set, o_temp_set
import time import soundfile as sf import libutils as lu from scipy import signal from scipy import interpolate # Debug: #from libdevhelpers import * # tools location: # Hard coded for nom. TODO: Change _sptk_bin_dir = os.environ[ 'HOME'] + '/Dropbox/Projects/sublimetext_as_python_ide/common/SPTK-3.7' _reaper_bin = os.environ[ 'HOME'] + '/Dropbox/Projects/sublimetext_as_python_ide/common/REAPER/build/reaper' _io = BinaryIOCollection() _curr_dir = os.path.dirname(os.path.realpath(__file__)) MAGIC = -1.0E+10 # logarithm floor (the same as SPTK) #------------------------------------------------------------------------------ def shift_to_pm(v_shift): v_pm = np.cumsum(v_shift) return v_pm #------------------------------------------------------------------------------ def pm_to_shift(v_pm): v_shift = np.diff(np.hstack((0, v_pm))) return v_shift
def load_next_partition(self): print 'Loading Next Partition' sftp = pysftp.Connection('eddie.ecdf.ed.ac.uk', username='******', password='******') i_temp_set = numpy.empty((self.buffer_size, self.n_ins)) o_temp_set = numpy.empty((self.buffer_size, self.n_outs)) current_index = 0 ### first check whether there are remaining data from previous utterance if self.remain_frame_number > 0: remain_frames = self.remain_frame_number - current_index remain_size = self.i_remain_data.shape[0] if remain_frames != remain_size: if remain_frames < remain_size: if remain_size - remain_frames <= 20: print "Check remain_size" else: print "We delete", self.remain_frame_number, "frames" self.remain_frame_number = 0 elif remain_frames > remain_size: self.remain_frame_number = self.remain_frame_number - ( remain_frames - remain_size) i_temp_set[ current_index:self.remain_frame_number, ] = self.i_remain_data o_temp_set[ current_index:self.remain_frame_number, ] = self.o_remain_data current_index += self.remain_frame_number self.remain_frame_number = 0 io_function = BinaryIOCollection() while True: if current_index >= self.buffer_size: break if self.file_index >= self.list_size: self.end_reading = True self.file_index = 0 break sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items = [] while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name, " is :", os.path.isfile( i_file_name), "in beginning of load_next_partition" print o_file_name, " is :", os.path.isfile( o_file_name), "in beginning of load_next_partition" remove_items.append(i_file_name) remove_items.append(o_file_name) self.file_index += 1 sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id( self.i_list[self.file_index]) o_file_name = self.extract_file_id( self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) i_features, lab_frame_number = io_function.load_binary_file_frame( i_file_name, self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame( o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] frame_number = lab_frame_number if abs( lab_frame_number - out_frame_number ) < 5: ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference if lab_frame_number > out_frame_number: frame_number = out_frame_number else: while abs(lab_frame_number - out_frame_number) > 5: self.file_index += 1 ############# while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name, " is :", os.path.isfile( i_file_name), "in ELSE of load_next_partition" print o_file_name, " is :", os.path.isfile( o_file_name), "in ELSE of load_next_partition" remove_items.append(i_file_name) remove_items.append(o_file_name) sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id( self.i_list[self.file_index]) o_file_name = self.extract_file_id( self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) i_features, lab_frame_number = io_function.load_binary_file_frame( i_file_name, self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame( o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] ############# o_features = o_features[0:frame_number, ] i_features = i_features[0:frame_number, ] if current_index + frame_number <= self.buffer_size: i_temp_set[current_index:current_index + frame_number, ] = i_features o_temp_set[current_index:current_index + frame_number, ] = o_features current_index = current_index + frame_number else: ## if current utterance cannot be stored in the block, then leave the remaining part for the next block used_frame_number = self.buffer_size - current_index i_temp_set[current_index:self.buffer_size, ] = i_features[ 0:used_frame_number, ] o_temp_set[current_index:self.buffer_size, ] = o_features[ 0:used_frame_number, ] current_index = self.buffer_size self.i_remain_data = i_features[ used_frame_number:frame_number, ] self.o_remain_data = o_features[ used_frame_number:frame_number, ] self.remain_frame_number = frame_number - used_frame_number self.file_index += 1 sftp.close() i_temp_set = i_temp_set[0:current_index, ] o_temp_set = o_temp_set[0:current_index, ] numpy.random.seed(271639) numpy.random.shuffle(i_temp_set) numpy.random.seed(271639) numpy.random.shuffle(o_temp_set) i_shared_set = self.make_shared(i_temp_set, 'x') o_shared_set = self.make_shared(o_temp_set, 'y') io_shared_set = (i_shared_set, o_shared_set) return io_shared_set, i_temp_set, o_temp_set
#src_aligned_lf0_dir = os.path.join(src_aligned_feat_dir, "lf0") #if not os.path.exists(src_aligned_mag_dir): # os.mkdir(src_aligned_mag_dir) #if not os.path.exists(src_aligned_bap_dir): # os.mkdir(src_aligned_bap_dir) #if not os.path.exists(src_aligned_lf0_dir): # os.mkdir(src_aligned_lf0_dir) ################################################################# ######## align source feats with target feats using dtw ## ###### ################################################################# io_funcs = BinaryIOCollection() aligner = AlignFeats() def get_mag_filelist(mag_dir): mag_files = [] for file in os.listdir(mag_dir): whole_filepath = os.path.join(mag_dir, file) if os.path.isfile(whole_filepath) and str(whole_filepath).endswith( ".mag"): mag_files.append(whole_filepath) elif os.path.isdir(whole_filepath): mag_files += get_mag_filelist(whole_filepath) mag_files.sort()
def zeros(self, m, n): if (n == 1): arr = np.ndarray((m, ), float) else: arr = np.ndarray((m, n), float) arr.fill(0) return arr if __name__ == "__main__": dnn_dir = '/afs/inf.ed.ac.uk/group/cstr/projects/phd/s1432486/work/dnn_tts_blzpilot/' label_align_dir = os.path.join(dnn_dir, 'two_stage_mtldnn/data/label_state_align') htsclass = readHTSlabelFile() io_funcs = BinaryIOCollection() DFP = 1 if DFP: parseLabFile = True if parseLabFile: filelist = os.path.join(dnn_dir, 'two_stage_mtldnn/data/file_id_list.scp') list_of_files = io_funcs.load_file_list(filelist) max_syl_dur = 0 max_syl_dur_filename = '' for i in range(len(list_of_files)): filename = list_of_files[i] print filename
def load_next_utterance(self): print ("Loading Next Utterance") i_temp_set = numpy.empty((self.buffer_size, self.n_ins)) o_temp_set = numpy.empty((self.buffer_size, self.n_outs)) io_function = BinaryIOCollection() sftp = pysftp.Connection('eddie.ecdf.ed.ac.uk',username='******',password='******') sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items = [] while (os.path.isfile(i_file_name) and os.path.isfile(o_file_name)) is False: print i_file_name," is :", os.path.isfile(i_file_name), "in load_next_utterance" print o_file_name," is :", os.path.isfile(o_file_name), "in load_next_utterance" remove_items.append(i_file_name) remove_items.append(o_file_name) self.file_index +=1 sftp.get(self.i_list[self.file_index]) sftp.get(self.o_list[self.file_index]) i_file_name = self.extract_file_id(self.i_list[self.file_index]) o_file_name = self.extract_file_id(self.o_list[self.file_index]) remove_items.append(i_file_name) remove_items.append(o_file_name) sftp.close() i_features, lab_frame_number = io_function.load_binary_file_frame(i_file_name, self.n_ins) o_features, out_frame_number = io_function.load_binary_file_frame(o_file_name, self.n_outs) for item in remove_items: if os.path.isfile(item) is True: os.remove(item) remove_items = [] frame_number = lab_frame_number if abs(lab_frame_number - out_frame_number) < 5: ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference if lab_frame_number > out_frame_number: frame_number = out_frame_number else: print "The number of frames in label and acoustic features are different : ",lab_frame_number, " vs " , out_frame_number raise i_temp_set = o_features[0:frame_number, ] o_temp_set = i_features[0:frame_number, ] self.file_index += 1 if self.file_index >= self.list_size: self.end_reading = True self.file_index = 0 i_shared_set = self.make_shared(i_temp_set, 'x') o_shared_set = self.make_shared(o_temp_set, 'y') io_shared_set = (i_shared_set, o_shared_set) return io_shared_set, i_temp_set, o_temp_set