def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 2 ### train_fnum_list = np.array(list(train_flen['framenum2utt'].keys())) train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range)) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, shuffle=False, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_bucket_model_without_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_count_list = list(train_flen['framenum2utt'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_count_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs))) file_num = 0 for sequence_length in train_count_list: train_idx_list = train_flen['framenum2utt'][sequence_length] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix( sub_train_x, max_length=sequence_length) temp_train_y = data_utils.transform_data_to_3d_matrix( sub_train_y, max_length=sequence_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_recurrent_model_batchsize_one(self, train_x, train_y, num_of_epochs, shuffle_data): ### if batch size is equal to 1 ### train_idx_list = list(train_x.keys()) if shuffle_data: random.seed(271638) random.shuffle(train_idx_list) train_file_number = len(train_idx_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs))) file_num = 0 for file_name in train_idx_list: temp_train_x = train_x[file_name] temp_train_y = train_y[file_name] temp_train_x = np.reshape( temp_train_x, (1, temp_train_x.shape[0], self.n_in)) temp_train_y = np.reshape( temp_train_y, (1, temp_train_y.shape[0], self.n_out)) self.model.train_on_batch(temp_train_x, temp_train_y) #self.model.fit(temp_train_x, temp_train_y, epochs=1, shuffle=False, verbose=0) file_num += 1 data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_padding_model(self, train_x, train_y, valid_x, valid_y, train_flen): # TODO: use packaged params ### Method 1 ### train_id_list = list(train_flen['utt2framenum'].keys()) if self.shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(self.num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num: file_num + self.batch_size] seq_len_arr = [train_flen['utt2framenum'][filename] for filename in train_idx_list] max_seq_length = max(seq_len_arr) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_id_list = list(train_flen['utt2framenum'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' % (epoch_num + 1, num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num:file_num + batch_size] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix( sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size) temp_train_y = data_utils.transform_data_to_3d_matrix( sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False): #### compute predictions #### io_funcs = BinaryIOCollection() test_file_number = len(gen_test_file_list) print("generating features on held-out test data...") for utt_index in range(test_file_number): gen_test_file_name = gen_test_file_list[utt_index] test_id = os.path.splitext(os.path.basename(gen_test_file_name))[0] temp_test_x = test_x[test_id] num_of_rows = temp_test_x.shape[0] if stateful: temp_test_x = data_utils.get_stateful_input( temp_test_x, self.seq_length, self.batch_size) elif sequential_training: temp_test_x = np.reshape(temp_test_x, (1, num_of_rows, self.n_in)) predictions = self.model.predict(temp_test_x) if sequential_training: predictions = np.reshape(predictions, (num_of_rows, self.n_out)) data_utils.denorm_data(predictions, out_scaler) io_funcs.array_to_binary_file(predictions, gen_test_file_name) data_utils.drawProgressBar(utt_index + 1, test_file_number) sys.stdout.write("\n")
def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False): #### compute predictions #### io_funcs = BinaryIOCollection() test_file_number = len(gen_test_file_list) print("generating features on held-out test data...") for utt_index in range(test_file_number): gen_test_file_name = gen_test_file_list[utt_index] test_id = os.path.splitext(os.path.basename(gen_test_file_name))[0] temp_test_x = test_x[test_id] num_of_rows = temp_test_x.shape[0] if stateful: temp_test_x = data_utils.get_stateful_input(temp_test_x, self.seq_length, self.batch_size) elif sequential_training: temp_test_x = np.reshape(temp_test_x, (1, num_of_rows, self.n_in)) predictions = self.model.predict(temp_test_x) if sequential_training: predictions = np.reshape(predictions, (num_of_rows, self.n_out)) data_utils.denorm_data(predictions, out_scaler) io_funcs.array_to_binary_file(predictions, gen_test_file_name) data_utils.drawProgressBar(utt_index+1, test_file_number) sys.stdout.write("\n")
def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_fnum_list = np.array(train_flen['framenum2utt'].keys()) train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in xrange(num_of_epochs): print 'Epoch: %d/%d ' %(epoch_num+1, num_of_epochs) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_bucket_model_without_padding(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): """This function is not used as of now """ ### Method 4 ### train_count_list = list(train_flen['framenum2utt'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_count_list) train_file_number = len(train_x) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 for sequence_length in train_count_list: train_idx_list = train_flen['framenum2utt'][sequence_length] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=sequence_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=sequence_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen): # TODO: use packaged params ### Method 2 ### train_fnum_list = np.array(list(train_flen['framenum2utt'].keys())) train_range_list = list(range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range)) if self.shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in range(self.num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, self.num_of_epochs))) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>=min_seq_length) & (train_fnum_list<max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=self.batch_size, shuffle=False, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_bucket_model_with_padding(self, train_x, train_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 2 ### train_fnum_list = np.array(train_flen['framenum2utt'].keys()) train_range_list = range(min(train_fnum_list), max(train_fnum_list), self.bucket_range) if shuffle_data: random.seed(271638) random.shuffle(train_range_list) train_file_number = len(train_x) for epoch_num in xrange(num_of_epochs): print('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)) file_num = 0 for frame_num in train_range_list: min_seq_length = frame_num max_seq_length = frame_num+self.bucket_range sub_train_list = train_fnum_list[(train_fnum_list>min_seq_length) & (train_fnum_list<=max_seq_length)] if len(sub_train_list)==0: continue; train_idx_list = sum([train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, max_length=max_seq_length) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, max_length=max_seq_length) self.model.fit(temp_train_x, temp_train_y, batch_size=batch_size, epochs=1, verbose=0) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def synth_wav(self, bin_file, gen_test_file_list, gen_wav_file_list): #### synthesize audio files #### test_file_number = len(gen_test_file_list) for utt_index in xrange(test_file_number): gen_feat_file = gen_test_file_list[utt_index] gen_wav_file = gen_wav_file_list[utt_index] cmd = "%s %s %s" %(bin_file, gen_feat_file, gen_wav_file) os.system(cmd) data_utils.drawProgressBar(utt_index+1, test_file_number) sys.stdout.write("\n")
def train_recurrent_model_batchsize_one(self, train_x, train_y, valid_x, valid_y, num_of_epochs, shuffle_data): ### if batch size is equal to 1 ### train_idx_list = list(train_x.keys()) if shuffle_data: random.seed(271638) random.shuffle(train_idx_list) train_file_number = len(train_idx_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 for file_name in train_idx_list: temp_train_x = train_x[file_name] temp_train_y = train_y[file_name] temp_train_x = np.reshape(temp_train_x, (1, temp_train_x.shape[0], self.n_in)) temp_train_y = np.reshape(temp_train_y, (1, temp_train_y.shape[0], self.n_out)) self.model.train_on_batch(temp_train_x, temp_train_y) #self.model.fit(temp_train_x, temp_train_y, epochs=1, shuffle=False, verbose=0) file_num += 1 data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_batchsize_one_model(self, train_x, train_y, num_of_epochs=10, shuffle_data=True): ### train each sentence as a batch ### train_idx_list = train_x.keys() if shuffle_data: random.seed(271638) random.shuffle(train_idx_list) train_file_number = len(train_idx_list) for epoch_num in xrange(num_of_epochs): print 'Epoch: %d/%d ' %(epoch_num+1, num_of_epochs) file_num = 0 for file_name in train_idx_list: temp_train_x = train_x[file_name] temp_train_y = train_y[file_name] temp_train_x = np.reshape(temp_train_x, (1, temp_train_x.shape[0], self.n_in)) temp_train_y = np.reshape(temp_train_y, (1, temp_train_y.shape[0], self.n_out)) self.model.train_on_batch(temp_train_x, temp_train_y) #self.model.fit(temp_train_x, temp_train_y, epochs=1, shuffle=False, verbose=0) file_num += 1 data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_batchsize_one_model(self, train_x, train_y, num_of_epochs=10, shuffle_data=True): ### train each sentence as a batch ### train_idx_list = train_x.keys() if shuffle_data: random.seed(271638) random.shuffle(train_idx_list) train_file_number = len(train_idx_list) for epoch_num in xrange(num_of_epochs): print('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs)) file_num = 0 for file_name in train_idx_list: temp_train_x = train_x[file_name] temp_train_y = train_y[file_name] temp_train_x = np.reshape(temp_train_x, (1, temp_train_x.shape[0], self.n_in)) temp_train_y = np.reshape(temp_train_y, (1, temp_train_y.shape[0], self.n_out)) self.model.train_on_batch(temp_train_x, temp_train_y) #self.model.fit(temp_train_x, temp_train_y, epochs=1, shuffle=False, verbose=0) file_num += 1 data_utils.drawProgressBar(file_num, train_file_number) sys.stdout.write("\n")
def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): ### Method 3 ### train_id_list = list(train_flen['utt2framenum'].keys()) if shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_id_list) for epoch_num in range(num_of_epochs): print(('Epoch: %d/%d ' %(epoch_num+1, num_of_epochs))) file_num = 0 while file_num < train_file_number: train_idx_list = train_id_list[file_num: file_num + batch_size] sub_train_x = dict((filename, train_x[filename]) for filename in train_idx_list) sub_train_y = dict((filename, train_y[filename]) for filename in train_idx_list) temp_train_x = data_utils.transform_data_to_3d_matrix(sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size) temp_train_y = data_utils.transform_data_to_3d_matrix(sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size) self.model.train_on_batch(temp_train_x, temp_train_y) file_num += len(train_idx_list) data_utils.drawProgressBar(file_num, train_file_number) print(" Validation error: %.3f" % (self.get_validation_error(valid_x, valid_y)))
def train_shared_model(self, train_x, train_y, valid_x, valid_y): # writer = tf.summary.FileWriter(self.tensorboard_dir) tb_callback_dict = {speaker: None for speaker in self.speaker_id} for model in self.models: # Set up tensorboard directory = self.tensorboard_dir + '_' + model.name tb_callback_dict[model.name] = callbacks.TensorBoard(log_dir=directory, histogram_freq=0, write_graph=True, batch_size=self.batch_size, update_freq='epoch') tb_callback_dict[model.name].set_model(model) # Need to randomize batches train_id_list = list(train_x.keys()) valid_id_list = list(valid_x.keys()) if self.shuffle_data: random.seed(271638) random.shuffle(train_id_list) train_file_number = len(train_x) valid_file_number = len(valid_x) training_loss = {speaker: [] for speaker in self.speaker_id} validation_loss = {speaker: [] for speaker in self.speaker_id} min_valid_loss = [0, float('inf')] for epoch_num in range(self.num_of_epochs): print(('\nEpoch: %d/%d ' %(epoch_num+1, self.num_of_epochs))) batch_training_loss = {speaker: [] for speaker in self.speaker_id} batch_validation_loss = {speaker: [] for speaker in self.speaker_id} # batch_count = {speaker: 0 for speaker in self.speaker_id} for i in range(train_file_number): key = train_id_list[i] x = train_x[key].reshape(1, train_x[key].shape[0], self.inp_dim) y = train_y[key].reshape(1, train_y[key].shape[0], self.out_dim) # Identify which output to use ind = np.where([spk in key for spk in self.speaker_id])[0][0] batch_speaker = self.speaker_id[ind] # Run train on batch for model in self.models: if model.name == batch_speaker: batch_training_loss[batch_speaker].append(model.train_on_batch(x, y)) data_utils.drawProgressBar(i, train_file_number-1) # Average training loss per epoch for speaker in batch_training_loss.keys(): training_loss[speaker].append(np.mean(batch_training_loss[speaker])) print('\nTraining loss %s: %.3f' % (speaker, training_loss[speaker][-1])) # for each epoch, run validation set for i in range(valid_file_number): key = valid_id_list[i] x = valid_x[key].reshape(1, valid_x[key].shape[0], self.inp_dim) y = valid_y[key].reshape(1, valid_y[key].shape[0], self.out_dim) # Identify which output to use ind = np.where([spk in key for spk in self.speaker_id])[0][0] batch_speaker = self.speaker_id[ind] # Run test on batch for model in self.models: if model.name == batch_speaker: batch_validation_loss[batch_speaker].append(model.test_on_batch(x, y)) data_utils.drawProgressBar(i, valid_file_number-1) # Average validation loss per epoch for speaker in batch_training_loss.keys(): validation_loss[speaker].append(np.mean(batch_validation_loss[speaker])) print('\nValidation loss %s: %.3f' % (speaker, validation_loss[speaker][-1])) # Update tensorboard object for speaker in batch_training_loss.keys(): tb_callback_dict[speaker].on_epoch_end(epoch_num, logs={'loss': training_loss[speaker][-1], 'valid_loss': validation_loss[speaker][-1]}) # Update min loss if min_valid_loss[1] > validation_loss[speaker][-1]: min_valid_loss[0] = epoch_num min_valid_loss[1] = validation_loss[speaker][-1] # Check if early stop - Not sure best way to implement for multiple models # if epoch_num - min_valid_loss[0] > self.stopping_patience: # print('Stopping early') # tb_callback_dict[speaker].on_train_end(None) # break # Signal end of training to tensorboard for speaker in tb_callback_dict.keys(): tb_callback_dict[speaker].on_train_end(None)