def setUp(self): self.mn = MockNotifier() self.fo = FileObserver(self.mn) self.fn1 = self.get_name() self.fn2 = self.get_name() self.log(self.fn1) self.log(self.fn1) # fn1/fn2 has messages starting from 3 self.log(self.fn2) # the counter is unique, remember! self.fm1 = FileMonitor(self.fn1) self.fm2 = FileMonitor(self.fn2)
def to_train(self): # Vector of angles is created, from -90 to 90, steps of 45, and converting it to radians angles_vector = np.arange(-90, 90 + 45, 45) * math.pi / 180.0 # The buffer is created, it will contain the files that are created while the gpu is busy tf_data_buffer = [] # An observer is created to help us see what files of interest to us have already been deleted file_observer = fo.FileObserver(files=self.training_tf_records_files) # Infinite loop to create training files that have been deleted while True: # While loop if the buffer size limit is not exceeded while len(tf_data_buffer) < self.training_buffer_size_limit: # A list is created where tfrecords files are saved tf_data = [] # The range of speakers contained in the training file is iterated. for speaker in tqdm(range(self.n_speakers_in_training_file)): # The number of speakers that the mix will have is randomly selected this_n_speakers_in_audio = np.random.randint( self.min_mixed_speakers_in_training_file, self.max_mixed_speakers_in_training_file) #the information of a certain number of training announcers from the database is randomly selected speakers_data = self.get_random_speakers_data( self.ids_speakers_in_train, this_n_speakers_in_audio) #Each speaker can have one or more samples, therefore, one is chosen at random for each speaker, this is repeated according to the times that the user defines it for n in range(self.n_repeated_speaker_in_training_file): # Variable is created to keep the audio signal value of the speakers audio_signals = np.zeros( (this_n_speakers_in_audio, self.window_length)) # Variable is created to keep the energy value that audio signals have speakers_audio_energies = np.zeros( this_n_speakers_in_audio) # It iterates over each of the speakers, taking random audio for x, speaker_data in enumerate(speakers_data): # The audio data is obtained: start VAD and audio path random_audio_data, VAD_random_start, audio_path = self.get_random_speaker_audio( speaker_data, self.audio_folder_path, self.window_length) # The signal audio is loaded raw_signal, sr = sf.read(audio_path) # It is cut in the part indicated by VAD audio_signals[x] = raw_signal[ VAD_random_start:VAD_random_start + self.window_length] # The audio energy is calculated speakers_audio_energies[x] = np.sqrt( np.square(audio_signals[x]).sum() / float(len(audio_signals[x]))) # A noise scale is obtained from the highest energy value in the list noise_scale = np.max( speakers_audio_energies) / speakers_audio_energies # All values are normalized according to the noise scale for h in range(this_n_speakers_in_audio): audio_signals[h] = self.normalize_signal( audio_signals[h] * noise_scale[h]) # The speaker's angles are randomly obtained doas = random.sample(angles_vector, this_n_speakers_in_audio) # The first angle is considered as the one of interes (SOI) doa_steer = doas[0] # A matrix of the size [microphones + 2, window size] is created (the last two data of the first dimension keep the signal of the # source of interest of microphone 0 and source of interference of microphone 0, # this because also it is given the same shift). X = np.zeros([self.M + 2, self.window_length]) # All signals are mixed, this will be used to simulate microphone 0 mix_signal = np.sum( audio_signals[0:, :], axis=0) / float(this_n_speakers_in_audio) # A variable is created where the signals will be saved with offset to simulate the microphone signal 1 signal_delay = np.zeros(self.window_length) for audio_source, doa in zip(audio_signals, doas): signal_delay += delay_f( audio_source, (self.beamFormer.d / self.beamFormer.c) * math.sin(doa), self.sample_rate) # Data is normalized signal_delay = signal_delay / float( this_n_speakers_in_audio) # microphone 0 X[0, :] = mix_signal # microphone 1 X[1, :] = signal_delay # source of interest at microphone 0 X[2, :] = audio_signals[0, :] # interference sources at microphone 0 X[3, :] = np.sum( audio_signals[1:, :], axis=0) / float(this_n_speakers_in_audio - 1) beamformer_result = self.beamFormer.phase_mask( X=X, doa_steer=doa_steer, phase_diff_threshold=self.phase_diff_threshold, N=self.window_length, nframes=self.nframes, fs=self.sample_rate) beamformer_result = beamformer_result[:, self.nframes * 4:self.nframes * -3] # beamformer_result[0, :] SOI calculated by the beamformer # beamformer_result[1, :] Interference calculated by the beamformer # beamformer_result[2, :] audio at microphone 0 # beamformer_result[3, :] SOI (Clean) # beamformer_result[4, :] Interference (Clean) # stft are calculated for all resulting signals _, _, Z_s1 = signal.stft(beamformer_result[3, :], fs=self.sample_rate, nperseg=self.nperseg, noverlap=self.noverlap) _, _, Z_s2 = signal.stft(beamformer_result[4, :], fs=self.sample_rate, nperseg=self.nperseg, noverlap=self.noverlap) _, _, Z_X_0 = signal.stft(beamformer_result[2, :], fs=self.sample_rate, nperseg=self.nperseg, noverlap=self.noverlap) _, _, Z_DOA = signal.stft(beamformer_result[0, :], fs=self.sample_rate, nperseg=self.nperseg, noverlap=self.noverlap) _, _, Z_interf = signal.stft(beamformer_result[1, :], fs=self.sample_rate, nperseg=self.nperseg, noverlap=self.noverlap) # the magnitudes are passed to decibels db_mag_s1 = to_dB_mag(np.abs(Z_s1), self.MIN_AMP, self.AMP_FAC) db_mag_s2 = to_dB_mag(np.abs(Z_s2), self.MIN_AMP, self.AMP_FAC) db_mag_X_0 = to_dB_mag(np.abs(Z_X_0), self.MIN_AMP, self.AMP_FAC) db_mag_DOA = to_dB_mag(np.abs(Z_DOA), self.MIN_AMP, self.AMP_FAC) db_mag_interf = to_dB_mag(np.abs(Z_interf), self.MIN_AMP, self.AMP_FAC) # The VAD frequency mask is created max_mag = np.max(db_mag_X_0) speech_VAD = (db_mag_X_0 > (max_mag - self.THRESHOLD)).astype(int) # Masks are created from the beamformer Y_beamformer = np.array([ db_mag_DOA > db_mag_interf, db_mag_DOA < db_mag_interf ]).astype(int) # data is normalized n_db_mag_X_0 = (db_mag_X_0 - db_mag_X_0.mean() ) / float(db_mag_X_0.std()) # Source of interest and interference are calculated using the mask created by the beamformer n_db_mag_source = n_db_mag_X_0 * Y_beamformer[0] n_db_mag_interf = n_db_mag_X_0 * Y_beamformer[1] # IBM is calculated Y = np.array( [db_mag_s1 > db_mag_s2, db_mag_s1 < db_mag_s2]).astype(int) # _, source_recovered = signal.istft(Y[0] * Z_X_0 , fs=self.sample_rate ,nperseg= NPERSEG, noverlap = NOVERLAP) # _, inter_recovered = signal.istft(Y[1] * Z_X_0 , fs=self.sample_rate ,nperseg= NPERSEG, noverlap = NOVERLAP) Y = np.transpose(Y, [1, 2, 0]) # sf.write('PhaseMask_original_ref.wav', o_[3, :], self.sample_rate) # sf.write('PhaseMask_original_inter.wav', o_[4, :], self.sample_rate) # sf.write('PhaseMask_beamformer_ref.wav', o_[0, :], self.sample_rate) # sf.write('PhaseMask_beamformer_inter.wav', o_[1, :], self.sample_rate) # sf.write('PhaseMask_beamformer_m0.wav', o_[2, :], self.sample_rate) # sf.write('PhaseMask_mask_ref.wav', source_recovered, self.sample_rate) # sf.write('PhaseMask_mask_inter.wav', inter_recovered, self.sample_rate) # exit(0) complex_X_0 = np.array([Z_X_0.real, Z_X_0.imag]) complex_X_0 = np.transpose(complex_X_0, [1, 2, 0]) data = { 'n_db_mag_X_0': self.tf_converter.bytes_feature( n_db_mag_X_0.astype(np.float32).tostring()), 'n_db_mag_ref': self.tf_converter.bytes_feature( n_db_mag_source.astype(np.float32).tostring()), 'n_db_mag_interf': self.tf_converter.bytes_feature( n_db_mag_interf.astype(np.float32).tostring()), 'complex_X_0': self.tf_converter.bytes_feature( complex_X_0.astype(np.float32).tostring()), 'MASK': self.tf_converter.bytes_feature( Y.astype(np.uint8).tostring()), 'VAD': self.tf_converter.bytes_feature( speech_VAD.astype(np.uint8).tostring()) } tf_data.append(data) if len(tf_data_buffer ) > 0: #if we have data in the buffer if not file_observer.exist_all_files( ): #and if there is no file print("lvl 2") name_file = file_observer.missing_file( ) # name of the missing file is obteined self.tf_converter.set_features( tf_data_buffer.pop()).convert_to_tf( name_file ) # the missing file is created tf_data_buffer.append( tf_data) # created data is added to the buffer while not file_observer.exist_all_files( ): # while files are missing if len( tf_data_buffer ) > 0: # If the buffer has data, the file is created print("lvl 1") name_file = file_observer.missing_file() self.tf_converter.set_features( tf_data_buffer.pop()).convert_to_tf(name_file) else: break ##else if it has nothing, break the loop file_observer.wait_if_exist_files( ) # if the buffer is full, wait until a file is missing print("lvl 0") name_file = file_observer.missing_file() self.tf_converter.set_features( tf_data_buffer.pop()).convert_to_tf(name_file)
def run_model(self): path = self.tf_records_training_path test_path = self.tf_records_test_path # number of tfrecords files, default is 8 files = [0, 1, 2, 3, 4, 5, 6, 7] tf_records_files = [ path + '{}.tfrecords'.format(global_step) for global_step in files ] test_tf_records_files = [test_path + '0.tfrecords'] if (self.network == "BLSTMModel"): ###BinaryMaskModel print("Using BLSTMModel") model = BLSTMModel( num_input=self.num_input, timesteps=self.timesteps, num_hidden=self.num_hidden, layers=self.layers, sources=self.sources, optimizer=self.opt_params.optimizer, learning_rate=self.opt_params.learning_rate, batch_size=self.batch_size, momentum=self.opt_params.momentum, forget_bias=0.0 if self.load_session == "true" else 1.0) # 0.0 if self.load_session == "true" else 1.0 elif (self.network == "ChimeraNetwork"): ###BinaryMaskModel print("Using ChimeraNetwork") model = ChimeraNetwork( num_input=self.num_input, timesteps=self.timesteps, num_hidden=self.num_hidden, layers=self.layers, d_vector=self.d_vector, sources=self.sources, activation_function=self.activation_function, optimizer=self.opt_params.optimizer, learning_rate=self.opt_params.learning_rate, batch_size=self.batch_size, alpha=self.alpha, momentum=self.opt_params.momentum, forget_bias=0.0 if self.load_session == "true" else 1.0) # 0.0 if self.load_session == "true" else 1.0 file_observer = FileObserver(tf_records_files) tfRecordsParser = TFRecordsParser(self.num_input, self.timesteps, self.sources) tf_records_reader_training = TfRecordsReader( tf_records_files=tf_records_files, parse_function=tfRecordsParser.parse_function, batch_size=self.batch_size) tf_records_reader_test = TfRecordsReader( tf_records_files=test_tf_records_files, parse_function=tfRecordsParser.parse_function, batch_size=self.batch_size, shuffle=False) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tf_records_size_training = self.n_speakers_train * self.n_repeated_speakers_train steps_to_complete_tf_record = tf_records_size_training / float( self.batch_size) ##############Config paths speakers_to_save = 2 total_samples = speakers_to_save * self.files_per_speaker_test if not os.path.exists(self.results_path + self.file_name + '/'): os.makedirs(self.results_path + self.file_name + '/') if not os.path.exists(self.results_path + self.file_name + '/plot/'): os.makedirs(self.results_path + self.file_name + '/plot/') if not os.path.exists(self.results_path + self.file_name + '/rec_audios/'): os.makedirs(self.results_path + self.file_name + '/rec_audios/') for speaker in range(speakers_to_save): for global_step in range(self.files_per_speaker_test): if not os.path.exists( self.results_path + self.file_name + '/plot/{}/{}/'.format(speaker, global_step)): os.makedirs(self.results_path + self.file_name + '/plot/{}/{}/'.format(speaker, global_step)) for global_step in range(0, total_samples, self.files_per_speaker_test): if not os.path.exists(self.results_path + self.file_name + '/rec_audios/{}/'.format(global_step)): os.makedirs(self.results_path + self.file_name + '/rec_audios/{}/'.format(global_step)) ############## with tf.Session() as sess: print("V15") train_writer = tf.summary.FileWriter( self.graphs_path + '{}_train'.format(self.file_name)) test_writer = tf.summary.FileWriter( self.graphs_path + '{}_test'.format(self.file_name)) train_writer.add_graph(sess.graph) sess.run(init_op) training_handle = sess.run( tf_records_reader_training.iterator.string_handle()) testing_handle = sess.run( tf_records_reader_test.iterator.string_handle()) saver = tf.train.Saver() step = 0 if self.load_session == "true": file = open( self.session_path + '{}.stp'.format(self.file_name), "r") step = int(file.read().strip()) print("STEP:", step) file.close() #Restoring session saver.restore( sess, self.session_path + '{}/model_weights.ckpt'.format(self.file_name)) ######### 2000 = tfrecord file size, 8 = tfrecord files total_steps = ( self.n_speakers_train * self.n_repeated_speakers_train ) * 8 * self.n_epochs * self.globa_steps / self.batch_size ####remaining global steps 2000 = tfrecord file size, 8 = tfrecord files remaining = int( ((total_steps - step) * self.batch_size) / ((self.n_speakers_train * self.n_repeated_speakers_train) * 8 * self.n_epochs)) print("Remaining global_steps:", remaining) new_step = ( self.n_speakers_train * self.n_repeated_speakers_train) * 8 * self.n_epochs * ( self.globa_steps - remaining) / self.batch_size print("NEW STEP:", new_step) step = new_step self.globa_steps = remaining train_acc_mean = [] train_loss_mean = [] for global_step in range(0, self.globa_steps): #################################### ########### TRAINING ############## #################################### for epoch in range(0, self.n_epochs): file_observer.wait_if_not_exist_files() sess.run(tf_records_reader_training.iterator.initializer) last_tf_records_file_loaded = 0 train_acc_mean = [] train_loss_mean = [] while True: try: n_db_mag_X_0, n_db_mag_source, n_db_mag_interf, complex_X_0, MASK, VAD = sess.run( tf_records_reader_training.next_element, feed_dict={ tf_records_reader_training.handle: training_handle }) _, l, acc, lg, bmp, train_summary = sess.run( [ model.optimizer, model.loss, model.accuracy, model.y_pred, model.MASK_hat, model.summary ], feed_dict={ model.X: np.concatenate( [n_db_mag_source, n_db_mag_interf], axis=2), model.Y_true: MASK, model.VAD: VAD, model.X_real: complex_X_0[:, :, :, 0], model.X_imag: complex_X_0[:, :, :, 1], model.n_db_mag_X_0: n_db_mag_X_0, model.n_db_mag_X_0: n_db_mag_X_0 }) train_acc_mean.append(acc) train_loss_mean.append(l) train_writer.add_summary(train_summary, step) if step % self.display_step == 0 or step == 1: print(lg[0][0]) print(bmp.shape) print( np.sum(np.count_nonzero(bmp, axis=1), axis=0)) print( 'Step {}, globa_steps {} Minibatch Loss:{} Acc:{}' .format(str(step), str(global_step), str(l), str(acc))) step += 1 if step % steps_to_complete_tf_record == 0: if epoch == self.n_epochs - 1: os.remove(path + "{}.tfrecords".format( last_tf_records_file_loaded)) last_tf_records_file_loaded += 1 except tf.errors.OutOfRangeError: break print('epoch acc mean :{}'.format( str(np.array(train_acc_mean).mean()))) file = open( self.session_path + '{}.stp'.format(self.file_name), "w") file.write(str(step)) file.close() save_path = saver.save( sess, self.session_path + '{}/model_weights.ckpt'.format(self.file_name)) #################################### ########### TESTING ############## #################################### #if global_step % 1 == 0: sess.run(tf_records_reader_test.iterator.initializer) while True: try: n_db_mag_X_0, n_db_mag_source, n_db_mag_interf, complex_X_0, MASK, VAD = sess.run( tf_records_reader_test.next_element, feed_dict={ tf_records_reader_test.handle: testing_handle }) if (type(model) is ChimeraNetwork): res_hat_, l, acc, test_summary, Z_, Y_ = sess.run( [ model.res_hat, model.loss, model.accuracy, model.summary, model.Z_res, model.Y_res ], feed_dict={ model.X: np.concatenate( [n_db_mag_source, n_db_mag_interf], axis=2), model.Y_true: MASK, model.VAD: VAD, model.X_real: complex_X_0[:, :, :, 0], model.X_imag: complex_X_0[:, :, :, 1], model.n_db_mag_X_0: n_db_mag_X_0 }) else: res_hat_, l, acc, test_summary = sess.run( [ model.res_hat, model.loss, model.accuracy, model.summary ], feed_dict={ model.X: np.concatenate( [n_db_mag_source, n_db_mag_interf], axis=2), model.Y_true: MASK, model.VAD: VAD, model.X_real: complex_X_0[:, :, :, 0], model.X_imag: complex_X_0[:, :, :, 1], model.n_db_mag_X_0: n_db_mag_X_0 }) print( 'TEST Step {}, globa_steps {} Minibatch Loss:{} Acc:{}' .format(str(step), str(global_step), str(l), str(acc))) test_writer.add_summary(test_summary, step) except tf.errors.OutOfRangeError: break #Creating audios of two speakers just to check for sample in range( 0, speakers_to_save * self.files_per_speaker_test, self.files_per_speaker_test): a_hat = res_hat_[ sample:sample + self.files_per_speaker_test, :, :, 0] # self.timesteps , self.num_input , self.sources b_hat = res_hat_[sample:sample + self.files_per_speaker_test, :, :, 1] x_a_hat_list = [] x_b_hat_list = [] for j in range(0, self.files_per_speaker_test): _, x_a_hat = signal.istft( a_hat[j], fs=self.sample_rate, nperseg=self.stft_nperseg, noverlap=self.stft_noverlap) _, x_b_hat = signal.istft( b_hat[j], fs=self.sample_rate, nperseg=self.stft_nperseg, noverlap=self.stft_noverlap) x_a_hat_list += x_a_hat.tolist() x_b_hat_list += x_b_hat.tolist() sf.write( self.results_path + self.file_name + '/rec_audios/{}/{}_x_a_hat.wav'.format( sample, global_step), x_a_hat_list, self.sample_rate) sf.write( self.results_path + self.file_name + '/rec_audios/{}/{}_x_b_hat.wav'.format( sample, global_step), x_b_hat_list, self.sample_rate) self.make_eval(model, sess, tf_records_reader_test, step, train_acc_mean, train_loss_mean, testing_handle)
class FileObserverTest(unittest.TestCase, FileTestHelper): def setUp(self): self.mn = MockNotifier() self.fo = FileObserver(self.mn) self.fn1 = self.get_name() self.fn2 = self.get_name() self.log(self.fn1) self.log(self.fn1) # fn1/fn2 has messages starting from 3 self.log(self.fn2) # the counter is unique, remember! self.fm1 = FileMonitor(self.fn1) self.fm2 = FileMonitor(self.fn2) def tearDown(self): os.unlink(self.fn1) os.unlink(self.fn2) def testEmpty(self): self.fo.register(self.fm1, True) self.fo.register(self.fm2, False) self.fo.alarm() self.assertEqual(self.mn.notifications, []) def testOneLine(self): self.fo.register(self.fm1) self.log(self.fn1) self.fo.alarm() self.assertEqual(self.mn.notifications, [('logline', self.fn1, msg_template % (4, self.fn1), False)]) def testTwoLines(self): self.fo.register(self.fm1, True) self.fo.register(self.fm2, False) self.log(self.fn1) self.log(self.fn2) self.fo.alarm() self.assertEqual(self.mn.notifications, [('logline', self.fn1, msg_template % (4, self.fn1), True), ('logline', self.fn2, msg_template % (5, self.fn2), False)]) def testFalseAlarm(self): self.fo.register(self.fm1) self.fo.alarm() self.log(self.fn1) self.assertEqual(self.mn.notifications, []) self.fo.alarm() self.assertEqual(self.mn.notifications, [('logline', self.fn1, msg_template % (4, self.fn1), False) ]) def testThreeLines(self): self.fo.register(self.fm1) self.fo.register(self.fm2) self.log(self.fn1) self.log(self.fn2) self.log(self.fn1) self.fo.alarm() self.assertEqual(Set(self.mn.notifications), Set([('logline', self.fn1, msg_template % (4, self.fn1), False), ('logline', self.fn2, msg_template % (5, self.fn2), False), ('logline', self.fn1, msg_template % (6, self.fn1), False)]))