def load_dataset(self, train_out_file, save_h5, test=False): import h5py self.logger.debug('Load test dataset') if save_h5: self.logger.info('Saving datatable') (src_datatable, src_masks, src_seq_len, trg_datatable, trg_masks, trg_seq_len, train_src_speakers_max, train_src_speakers_min, train_trg_speakers_max, train_trg_speakers_min ) = self.s2s_datatable.seq2seq_save_datatable() self.logger.info('DONE - Saving datatable') else: self.logger.info('Load parameters') (src_datatable, src_masks, src_seq_len, trg_datatable, trg_masks, trg_seq_len, train_src_speakers_max, train_src_speakers_min, train_trg_speakers_max, train_trg_speakers_min ) = self.s2s_datatable.seq2seq_load_datatable() self.logger.info('DONE - Loaded parameters') if test: # Load training speakers data with h5py.File(train_out_file + '.h5', 'r') as file: # Load datasets train_src_speakers_max = file.attrs.get('src_speakers_max') train_src_speakers_min = file.attrs.get('src_speakers_min') train_trg_speakers_max = file.attrs.get('trg_speakers_max') train_trg_speakers_min = file.attrs.get('trg_speakers_min') file.close() train_src_speakers = train_src_speakers_max.shape[0] train_trg_speakers = train_trg_speakers_max.shape[0] # Normalize data self.logger.debug('Normalize data') # Iterate over sequence 'slices' assert src_datatable.shape[0] == trg_datatable.shape[0] for i in range(src_datatable.shape[0]): (src_datatable[i, :, 0:42], trg_datatable[i, :, 0:42]) = maxmin_scaling( src_datatable[i, :, :], src_masks[i, :], trg_datatable[i, :, :], trg_masks[i, :], train_src_speakers_max, train_src_speakers_min, train_trg_speakers_max, train_trg_speakers_min) return (src_datatable, src_seq_len, trg_datatable, trg_masks, trg_seq_len, train_src_speakers, train_src_speakers_max, train_src_speakers_min, train_trg_speakers, train_trg_speakers_max, train_trg_speakers_min, self.s2s_datatable.max_seq_length)
def load_preprocessed(self, save_h5=False): # self.valid_data = [] if save_h5: d = np.empty((0, self.seq_length, self.parameters_length)) for file_name in self.files_list: print(file_name) fil, mini, maxi = self.load_file(file_name) d = np.concatenate((d, fil)) self.data = d for i, sequence in enumerate(self.data): self.data[i] = maxmin_scaling(sequence, self.max_mat, self.min_mat) with h5py.File(self.dataset_file, 'w') as file: file.create_dataset('dataset', data=self.data, compression='gzip', compression_opts=9) else: with h5py.File(self.dataset_file, 'r') as file: self.data = file['dataset'][:] maxi = -1e+20 * np.ones(self.parameters_length) mini = 1e+20 * np.ones(self.parameters_length) valid_index = int(np.floor(self.data.shape[0] * 0.05)) if valid_index < self.batch_size: valid_index = self.batch_size + \ 1 self.valid_data = self.data[-valid_index:] self.data = self.data[:-valid_index] self.num_batches = int(np.floor(self.data.shape[0] / self.batch_size)) return mini, maxi
str(epochs) + '_lr_' + str(learning_rate) + '_weights.h5') ############################## # Predict sequences in batch # ############################## # Pre-allocate prediction results predictions = np.zeros( (nb_sequences, trg_test_datatable.shape[1], trg_test_datatable.shape[2])) for i in range(nb_sequences): # Normalize sequence src_test_datatable[i, :, 0:42] = s2s_norm.maxmin_scaling( src_test_datatable[i, :, :], src_test_masks[i, :], trg_test_datatable[i, :, :], trg_test_masks[i, :], train_speakers_max, train_speakers_min )[0] # Mask sequence masked_sequence = s2s_norm.mask_data( src_test_datatable[i, :, :], src_test_masks[i, :] ) # Get only valid data valid_sequence = masked_sequence[~masked_sequence.mask].reshape( (1, -1, masked_sequence.shape[1])
print('done') ################## # Normalize data # ################## # Iterate over sequence 'slices' assert src_train_datatable.shape[0] == trg_train_datatable.shape[0] for i in range(src_train_datatable.shape[0]): ( src_train_datatable[i, :, 0:42], trg_train_datatable[i, :, 0:42] ) = maxmin_scaling( src_train_datatable[i, :, :], src_train_masks[i, :], trg_train_datatable[i, :, :], trg_train_masks[i, :], train_speakers_max, train_speakers_min ) ################ # Define Model # ################ print('Initializing model') model = Sequential() # Encoder Layer model.add(GRU(100, input_dim=data_dim, return_sequences=False ))