def main(): libri = data_catalog(c.DATASET_DIR) x, y = random_batch(libri, c.BATCH_SIZE) b = x[0] print(b.shape[0]) print(b.shape[1]) print(b.shape[2])
def main(): libri = data_catalog() # filename speaker_id # 0 audio/LibriSpeechSamples/train-clean-100-npy/1-100-0001.npy 1 # 1 audio/LibriSpeechSamples/train-clean-100-npy/1-100-0002.npy 1 unique_speakers = libri['speaker_id'].unique() # 251 speaker print(libri.head()) print(unique_speakers) dataset = stochastic_mini_batch(libri) dataset.__getitem__(0)
def get_test(guess_dir): global num_neg libri = data_catalog(guess_dir) num_neg, num_triplets = 141, 2 test_batch = None filename = libri['filename'].values[0] x = np.load(filename) new_x = [] new_x.append(clipped_audio(x)) x = np.array(new_x) return x
def main(): args = arg_parse() model = load_model(args.checkpoint_path) print('model loaded... %s' % args.checkpoint_path) libri = data_catalog(args.db_folder, pattern='*.npy') print('database loaded... %s' % args.db_folder) labels, embedding = eval(model, libri) print('database data evaluated... ') np.save('emb', embedding) np.save('emb_label', labels) print('database data saved... ') database = Database(20000) for i in range(len(labels)): test_array, test_label = embedding[i], labels[i] database.insert(test_label, test_array) print("inserting database completed") # import test data as dataframe libri = data_catalog(args.testdata, pattern='*.npy') print('test data loaded...%s' % args.testdata) labels, embedding = eval(model, libri) print('test data evaluated') score(database, labels, embedding)
def test_recognifition(model, test_dir=c.MY_TEST_DIR, newwavfile=None): libri = data_catalog(test_dir) unique_speakers = list(libri['speaker_id'].unique()) #np.random.shuffle(unique_speakers) family_number = len(unique_speakers) member = {} family = {"19": "pengchong", "27": "zhaonan", "26": "tianxu"} for i in range(family_number): member[i] = family[unique_speakers[i]] family_anchor = None for ii in range(family_number): anchor_positive_file = libri[libri['speaker_id'] == unique_speakers[ii]] anchor_positive_file = anchor_positive_file.sample(n=1, replace=False) family_anchor = pd.concat([family_anchor, anchor_positive_file], axis=0) new_x = [] for i in range(family_number): filename = family_anchor[i:i + 1]['filename'].values[0] x = np.load(filename) new_x.append(clipped_audio(x)) #now the shape is 3*160*64*1 x = np.array(new_x) embedding = None embed = model.predict_on_batch(x) if embedding is None: embedding = embed.copy() #now the shape is 3*512 raw_audio = read_audio(newwavfile) feature = extract_features(raw_audio, target_sample_rate=c.SAMPLE_RATE) newmember = clipped_audio(feature) newmember = newmember.reshape(1, 160, 64, 1) newembedding = model.predict_on_batch(newmember) newembedding = np.tile(newembedding, (3, 1)) sim = np.array(batch_cosine_similarity(embedding, newembedding)) index = np.argmax(sim) if sim[index] > c.My_Famliy_Threshold: print("this is one of our famliy member.") print("----------********you are : {}!*******-----------".format( member[index])) else: print( "you are not one of my famliy member.Please try again or try anotherone" )
def create_test_data(test_dir, check_partial): global num_neg libri = data_catalog(test_dir) unique_speakers = list(libri['speaker_id'].unique()) np.random.shuffle(unique_speakers) num_triplets = len(unique_speakers) if check_partial: num_neg = 49 num_triplets = min(num_triplets, 30) test_batch = None for ii in range(num_triplets): anchor_positive_file = libri[libri['speaker_id'] == unique_speakers[ii]] if len(anchor_positive_file) < 2: continue anchor_positive_file = anchor_positive_file.sample(n=2, replace=False) anchor_df = pd.DataFrame(anchor_positive_file[0:1]) anchor_df[ 'training_type'] = 'ancfrom thor' # 1 anchor,1 positive,num_neg negative if test_batch is None: test_batch = anchor_df.copy() else: test_batch = pd.concat([test_batch, anchor_df], axis=0) positive_df = pd.DataFrame(anchor_positive_file[1:2]) positive_df['training_type'] = 'positive' test_batch = pd.concat([test_batch, positive_df], axis=0) negative_files = libri[ libri['speaker_id'] != unique_speakers[ii]].sample(n=num_neg, replace=False) for index in range(len(negative_files)): negative_df = pd.DataFrame(negative_files[index:index + 1]) negative_df['training_type'] = 'negative' test_batch = pd.concat([test_batch, negative_df], axis=0) new_x = [] for i in range(len(test_batch)): filename = test_batch[i:i + 1]['filename'].values[0] x = np.load(filename) new_x.append(clipped_audio(x)) x = np.array(new_x) # (batchsize, num_frames, 64, 1) new_y = np.hstack(([1], np.zeros(num_neg))) # 1 positive, num_neg negative y = np.tile(new_y, num_triplets) return x, y
def main(libri_dir=c.DATASET_DIR): print('Looking for fbank features [.npy] files in {}.'.format(libri_dir)) libri = data_catalog(libri_dir) # filename speaker_id # 0 audio/LibriSpeechSamples/train-clean-100-npy/1-100-0001.npy 1 # 1 audio/LibriSpeechSamples/train-clean-100-npy/1-100-0002.npy 1 unique_speakers = libri['speaker_id'].unique() # 251 speaker transform=transforms.Compose([transforms.ToTensor()]) train_dir = stochastic_mini_batch(libri) train_loader = DataLoader(train_dir, batch_size=c.BATCH_SIZE, shuffle=True) model = DeepSpeakerModel(embedding_size=c.EMBEDDING_SIZE,num_classes=c.NUM_SPEAKERS) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0) epoch = 0 model.cuda() summary(model, input_size=(1, 160, 64)) for epoch in range(100): model.train() for batch_idx, (data_a, data_p, data_n,label_a,label_p,label_n) in tqdm(enumerate(train_loader)): data_a, data_p, data_n = data_a.type(torch.FloatTensor),data_p.type(torch.FloatTensor),data_n.type(torch.FloatTensor) data_a, data_p, data_n = data_a.cuda(), data_p.cuda(), data_n.cuda() data_a, data_p, data_n = Variable(data_a), Variable(data_p), Variable(data_n) out_a, out_p, out_n = model(data_a), model(data_p), model(data_n) triplet_loss = TripletMarginLoss(0.2).forward(out_a, out_p, out_n) loss = triplet_loss # compute gradient and update weights optimizer.zero_grad() loss.backward() optimizer.step() print('selected_triplet_loss', triplet_loss.data) print("epoch:",epoch) torch.save(model.state_dict(),"checkpoint_{}.pt".format(epoch))
batch = np.concatenate([np.array(anchor_batch), np.array(positive_batch), np.array(negative_batch)], axis=0) labs = anchor_labs + positive_labs + negative_labs print("select best batch time {0:.3}s".format(time() - orig_time)) return batch, np.array(labs) if __name__ == '__main__': model = convolutional_model() model.compile(optimizer='adam', loss=deep_speaker_loss) last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER) if last_checkpoint is not None: print('Found checkpoint [{}]. Resume from here...'.format(last_checkpoint)) model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) print('[DONE]') libri = data_catalog(c.DATASET_DIR) unique_speakers = libri['speaker_id'].unique() labels = libri['speaker_id'].values files = libri['filename'].values spk_utt_dict = {} for i in range(len(unique_speakers)): spk_utt_dict[unique_speakers[i]] = [] for i in range(len(labels)): spk_utt_dict[labels[i]].append(files[i]) create_data_producer(unique_speakers,spk_utt_dict) for i in range(100): x, y = best_batch(model) print(x.shape) #print(y)
def main(): libri = data_catalog(c.DATASET_DIR) batch = stochastic_mini_batch(libri, c.BATCH_SIZE) x, y = batch.to_inputs() print(x.shape, y.shape)
def main(libri_dir=c.DATASET_DIR): PRE_TRAIN = c.PRE_TRAIN logging.info( 'Looking for fbank features [.npy] files in {}.'.format(libri_dir)) libri = data_catalog(libri_dir) if len(libri) == 0: logging.warning( 'Cannot find npy files, we will load audio, extract features and save it as npy file' ) logging.warning('Waiting for preprocess...') preprocess_and_save(c.WAV_DIR, c.DATASET_DIR) libri = data_catalog(libri_dir) if len(libri) == 0: logging.warning( 'Have you converted flac files to wav? If not, run audio/convert_flac_2_wav.sh' ) exit(1) unique_speakers = libri['speaker_id'].unique() spk_utt_dict, unique_speakers = create_dict(libri['filename'].values, libri['speaker_id'].values, unique_speakers) select_batch.create_data_producer(unique_speakers, spk_utt_dict) batch = stochastic_mini_batch(libri, batch_size=c.BATCH_SIZE, unique_speakers=unique_speakers) batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH x, y = batch.to_inputs() b = x[0] num_frames = b.shape[0] train_batch_size = batch_size #batch_shape = [batch_size * num_frames] + list(b.shape[1:]) # A triplet has 3 parts. input_shape = (num_frames, b.shape[1], b.shape[2]) logging.info('num_frames = {}'.format(num_frames)) logging.info('batch size: {}'.format(batch_size)) logging.info('input shape: {}'.format(input_shape)) logging.info('x.shape : {}'.format(x.shape)) orig_time = time() model = convolutional_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames) logging.info(model.summary()) gru_model = None if c.COMBINE_MODEL: gru_model = recurrent_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames) logging.info(gru_model.summary()) grad_steps = 0 if PRE_TRAIN: last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info( 'Found pre-training checkpoint [{}]. Resume from here...'. format(last_checkpoint)) x = model.output x = Dense(len(unique_speakers), activation='softmax', name='softmax_layer')(x) pre_model = Model(model.input, x) pre_model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) logging.info('Successfully loaded pre-training model') else: last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info('Found checkpoint [{}]. Resume from here...'.format( last_checkpoint)) model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) logging.info('[DONE]') if c.COMBINE_MODEL: last_checkpoint = get_last_checkpoint_if_any( c.GRU_CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info( 'Found checkpoint [{}]. Resume from here...'.format( last_checkpoint)) gru_model.load_weights(last_checkpoint) logging.info('[DONE]') #adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer='adam', loss=deep_speaker_loss) if c.COMBINE_MODEL: gru_model.compile(optimizer='adam', loss=deep_speaker_loss) print("model_build_time", time() - orig_time) logging.info('Starting training...') lasteer = 10 eer = 1 while True: orig_time = time() x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE) print("select_batch_time:", time() - orig_time) y = np.random.uniform(size=(x.shape[0], 1)) logging.info('== Presenting step #{0}'.format(grad_steps)) orig_time = time() loss = model.train_on_batch(x, y) logging.info( '== Processed in {0:.2f}s by the network, training loss = {1}.'. format(time() - orig_time, loss)) if c.COMBINE_MODEL: loss1 = gru_model.train_on_batch(x, y) logging.info( '== Processed in {0:.2f}s by the gru-network, training loss = {1}.' .format(time() - orig_time, loss1)) with open(c.GRU_CHECKPOINT_FOLDER + '/losses_gru.txt', "a") as f: f.write("{0},{1}\n".format(grad_steps, loss1)) # record training loss with open(c.LOSS_LOG, "a") as f: f.write("{0},{1}\n".format(grad_steps, loss)) if (grad_steps) % 10 == 0: fm1, tpr1, acc1, eer1, fprr1, tprr1, auc1 = eval_model( model, train_batch_size, test_dir=c.DATASET_DIR, check_partial=True, gru_model=gru_model) logging.info( 'test training data EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f}, AUC = {2:.3f} ' .format(eer1, fm1, acc1, auc1)) with open(c.CHECKPOINT_FOLDER + '/train_acc_eer.txt', "a") as f: f.write("{0},{1},{2},{3}\n".format(grad_steps, eer1, fm1, acc1)) if (grad_steps) % c.TEST_PER_EPOCHS == 0: fm, tpr, acc, eer, fprr, tprr, auc = eval_model( model, train_batch_size, test_dir=c.TEST_DIR, gru_model=gru_model) logging.info( '== Testing model after batch #{0}'.format(grad_steps)) logging.info( 'EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f}, AUC = {2:.3f} ' .format(eer, fm, acc, auc)) with open(c.TEST_LOG, "a") as f: f.write("{0},{1},{2},{3}\n".format(grad_steps, eer, fm, acc)) # checkpoints are really heavy so let's just keep the last one. if (grad_steps) % c.SAVE_PER_EPOCHS == 0: create_dir_and_delete_content(c.CHECKPOINT_FOLDER) model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format( c.CHECKPOINT_FOLDER, grad_steps, loss)) if c.COMBINE_MODEL: gru_model.save_weights('{0}/grumodel_{1}_{2:.5f}.h5'.format( c.GRU_CHECKPOINT_FOLDER, grad_steps, loss1)) if eer < lasteer: files = sorted(filter( lambda f: os.path.isfile(f) and f.endswith(".h5"), map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))), key=lambda file: file.split('/')[-1].split('.')[ -2], reverse=True) lasteer = eer for file in files[:-4]: logging.info("removing old model: {}".format(file)) os.remove(file) model.save_weights( c.BEST_CHECKPOINT_FOLDER + '/best_model{0}_{1:.5f}.h5'.format(grad_steps, eer)) if c.COMBINE_MODEL: files = sorted( filter( lambda f: os.path.isfile(f) and f.endswith(".h5"), map( lambda f: os.path.join( c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))), key=lambda file: file.split('/')[-1].split('.')[-2], reverse=True) lasteer = eer for file in files[:-4]: logging.info("removing old model: {}".format(file)) os.remove(file) gru_model.save_weights(c.BEST_CHECKPOINT_FOLDER + '/best_gru_model{0}_{1:.5f}.h5'. format(grad_steps, eer)) grad_steps += 1
def main(): batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH # train_path = "/Users/walle/PycharmProjects/Speech/coding/deep-speaker-master/audio/LibriSpeechSamples/train-clean-100" train_path = c.DATASET_DIR libri = data_catalog(train_path) files = list(libri['filename']) labels1 = list(libri['speaker_id']) labels_to_id = {} id_to_labels = {} i = 0 for label in np.unique(labels1): labels_to_id[label] = i id_to_labels[i] = label i += 1 no_of_speakers = len(np.unique(labels1)) train_data, test_data = split_data(files, labels1, batch_size) batchloader = batchTrainingImageLoader(train_data, labels_to_id, no_of_speakers, batch_size=batch_size) testloader = batchTestImageLoader(test_data, labels_to_id, no_of_speakers, batch_size=batch_size) test_steps = int(len(test_data) / batch_size) x_test, y_test = testloader.__next__() b = x_test[0] num_frames = b.shape[0] logging.info('num_frames = {}'.format(num_frames)) logging.info('batch size: {}'.format(batch_size)) logging.info("x_shape:{0}, y_shape:{1}".format(x_test.shape, y_test.shape)) base_model = convolutional_model(input_shape=x_test.shape[1:], batch_size=batch_size, num_frames=num_frames) x = base_model.output x = Dense(no_of_speakers, activation='softmax', name='softmax_layer')(x) model = Model(base_model.input, x) logging.info(model.summary()) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print("printing format per batch:", model.metrics_names) # y_ = np.argmax(y_train, axis=0) # class_weights = sklearn.utils.class_weight.compute_class_weight('balanced', np.unique(y_), y_) grad_steps = 0 last_checkpoint = utils.get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER) last_checkpoint = None if last_checkpoint is not None: logging.info('Found checkpoint [{}]. Resume from here...'.format( last_checkpoint)) model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) logging.info('[DONE]') orig_time = time() Num_Iter = 100000 current_iter = 0 while current_iter < Num_Iter: current_iter += 1 orig_time = time() x_train, y_train = batchloader.__next__() [loss, acc] = model.train_on_batch(x_train, y_train) # return [loss, acc] logging.info( 'Train Steps:{0}, Time:{1:.2f}s, Loss={2}, Accuracy={3}'.format( grad_steps, time() - orig_time, loss, acc)) with open(c.PRE_CHECKPOINT_FOLDER + "/train_loss_acc.txt", "a") as f: f.write("{0},{1},{2}\n".format(grad_steps, loss, acc)) if grad_steps % c.TEST_PER_EPOCHS == 0: losses = [] accs = [] for ss in range(test_steps): [loss, acc] = model.test_on_batch(x_test, y_test) x_test, y_test = testloader.__next__() losses.append(loss) accs.append(acc) loss = np.mean(np.array(losses)) acc = np.mean(np.array(accs)) print("loss", loss, "acc", acc) logging.info( 'Test the Data ---------- Steps:{0}, Loss={1}, Accuracy={2}, '. format(grad_steps, loss, acc)) with open(c.PRE_CHECKPOINT_FOLDER + "/test_loss_acc.txt", "a") as f: f.write("{0},{1},{2}\n".format(grad_steps, loss, acc)) if grad_steps % c.SAVE_PER_EPOCHS == 0: utils.create_dir_and_delete_content(c.PRE_CHECKPOINT_FOLDER) model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format( c.PRE_CHECKPOINT_FOLDER, grad_steps, loss)) grad_steps += 1
def main(libri_dir=c.DATASET_DIR): PRE_TRAIN = c.PRE_TRAIN logging.info('Looking for fbank features [.npy] files in {}.'.format(libri_dir)) libri = data_catalog(libri_dir) if len(libri) == 0: logging.warning('Cannot find npy files, we will load audio, extract features and save it as npy file') logging.warning('Waiting for preprocess...') preprocess_and_save(c.WAV_DIR, c.DATASET_DIR) libri = data_catalog(libri_dir) if len(libri) == 0: logging.warning('Have you converted flac files to wav? If not, run audio/convert_flac_2_wav.sh') exit(1) unique_speakers = libri['speaker_id'].unique() spk_utt_dict, unique_speakers = create_dict(libri['filename'].values,libri['speaker_id'].values,unique_speakers) select_batch.create_data_producer(unique_speakers, spk_utt_dict) batch = stochastic_mini_batch(libri, batch_size=c.BATCH_SIZE, unique_speakers=unique_speakers) batch_size = c.BATCH_SIZE * c.TRIPLET_PER_BATCH x, y = batch.to_inputs() b = x[0] num_frames = b.shape[0] train_batch_size = batch_size #batch_shape = [batch_size * num_frames] + list(b.shape[1:]) # A triplet has 3 parts. input_shape = (num_frames, b.shape[1], b.shape[2]) logging.info('num_frames = {}'.format(num_frames)) logging.info('batch size: {}'.format(batch_size)) logging.info('input shape: {}'.format(input_shape)) logging.info('x.shape : {}'.format(x.shape)) # 按理x.shape:(batchsize, num_frames, 64, 1) orig_time = time() model = convolutional_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames) logging.info(model.summary()) gru_model = None if c.COMBINE_MODEL: gru_model = recurrent_model(input_shape=input_shape, batch_size=batch_size, num_frames=num_frames) logging.info(gru_model.summary()) grad_steps = 0 if PRE_TRAIN: last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info('Found pre-training checkpoint [{}]. Resume from here...'.format(last_checkpoint)) x = model.output x = Dense(len(unique_speakers), activation='softmax', name='softmax_layer')(x) pre_model = Model(model.input, x) pre_model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) logging.info('Successfully loaded pre-training model') else: last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info('Found checkpoint [{}]. Resume from here...'.format(last_checkpoint)) model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) logging.info('[DONE]') if c.COMBINE_MODEL: last_checkpoint = get_last_checkpoint_if_any(c.GRU_CHECKPOINT_FOLDER) if last_checkpoint is not None: logging.info('Found checkpoint [{}]. Resume from here...'.format(last_checkpoint)) gru_model.load_weights(last_checkpoint) logging.info('[DONE]') #adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer='adam', loss=deep_speaker_loss) if c.COMBINE_MODEL: gru_model.compile(optimizer='adam', loss=deep_speaker_loss) print("model_build_time",time()-orig_time) logging.info('Starting training...') lasteer = 10 eer = 1 # ======================================================================2020/05/21 10:38 train_times = [] # ===========================================================================2020/05/20 16:30 total_times = 0 # ===========================================================================2020/05/20 16:30 # 迭代10个epoch,每个epoch200个batch # while True: os.makedirs(c.BEST_CHECKPOINT_FOLDER, exist_ok=True) while grad_steps < 2001 : # ======================================================================结束 orig_time = time() x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE) print("select_batch_time:", time() - orig_time) y = np.random.uniform(size=(x.shape[0], 1)) # If "ValueError: Error when checking target: expected ln to have shape (None, 512) but got array with shape (96, 1)" # please modify line 121 to following line # y = np.random.uniform(size=(x.shape[0], 512)) logging.info('== Presenting step #{0}'.format(grad_steps)) orig_time = time() # ======================================================================2020/05/21 10:38 # 记录训练batch时间 # 记录迭代训练开始时间 begin_time = time() # ===========================================================================2020/05/20 16:30 loss = model.train_on_batch(x, y) # 记录迭代训练结束时间 train_end_time = time() # ===========================================================================2020/05/20 16:30 # ======================================================================结束 logging.info('== Processed in {0:.2f}s by the network, training loss = {1}.'.format(time() - orig_time, loss)) if c.COMBINE_MODEL: loss1 = gru_model.train_on_batch(x, y) logging.info( '== Processed in {0:.2f}s by the gru-network, training loss = {1}.'.format(time() - orig_time, loss1)) with open(c.GRU_CHECKPOINT_FOLDER + '/losses_gru.txt', "a") as f: f.write("{0},{1}\n".format(grad_steps, loss1)) # record training loss with open(c.LOSS_LOG, "a") as f: f.write("{0},{1}\n".format(grad_steps, loss)) if (grad_steps) % 10 == 0: fm1, tpr1, acc1, eer1 = eval_model(model, train_batch_size, test_dir=c.DATASET_DIR, check_partial=True, gru_model=gru_model) logging.info('test training data EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f} '.format(eer1, fm1, acc1)) with open(c.CHECKPOINT_FOLDER + '/train_acc_eer.txt', "a") as f: f.write("{0},{1},{2},{3}\n".format(grad_steps, eer1, fm1, acc1)) if (grad_steps ) % c.TEST_PER_EPOCHS == 0 : fm, tpr, acc, eer = eval_model(model,train_batch_size, test_dir=c.TEST_DIR,gru_model=gru_model) logging.info('== Testing model after batch #{0}'.format(grad_steps)) logging.info('EER = {0:.3f}, F-measure = {1:.3f}, Accuracy = {2:.3f} '.format(eer, fm, acc)) with open(c.TEST_LOG, "a") as f: f.write("{0},{1},{2},{3}\n".format(grad_steps, eer, fm, acc)) # checkpoints are really heavy so let's just keep the last one. if (grad_steps ) % c.SAVE_PER_EPOCHS == 0: create_dir_and_delete_content(c.CHECKPOINT_FOLDER) model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format(c.CHECKPOINT_FOLDER, grad_steps, loss)) if c.COMBINE_MODEL: gru_model.save_weights('{0}/grumodel_{1}_{2:.5f}.h5'.format(c.GRU_CHECKPOINT_FOLDER, grad_steps, loss1)) if eer < lasteer: files = sorted(filter(lambda f: os.path.isfile(f) and f.endswith(".h5"), map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))), key=lambda file: file.split('/')[-1].split('.')[-2], reverse=True) lasteer = eer for file in files[:-4]: logging.info("removing old model: {}".format(file)) os.remove(file) model.save_weights(c.BEST_CHECKPOINT_FOLDER+'/best_model{0}_{1:.5f}.h5'.format(grad_steps, eer)) if c.COMBINE_MODEL: files = sorted(filter(lambda f: os.path.isfile(f) and f.endswith(".h5"), map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))), key=lambda file: file.split('/')[-1].split('.')[-2], reverse=True) lasteer = eer for file in files[:-4]: logging.info("removing old model: {}".format(file)) os.remove(file) gru_model.save_weights(c.BEST_CHECKPOINT_FOLDER+'/best_gru_model{0}_{1:.5f}.h5'.format(grad_steps, eer)) grad_steps += 1 end_time = time() # ===========================================================================2020/05/20 16:30 total_times += train_end_time - begin_time # ===========================================================================2020/05/20 16:30 train_times.append(str(begin_time) + '_' + str(train_end_time) + '_' + str(end_time) + '_' + str(train_end_time - begin_time)) # ===========================================================================2020/05/20 16:30 print("步数:{},耗时:{}s".format(grad_steps, str(train_end_time - begin_time))) # ===========================================================================2020/05/20 16:30 # ===========================================================================2020/05/20 16:30 # 将时间写入文件 with open('DeepSpeaker_epoch10_spk{}_suttperspk{}_迭代耗时.txt'.format(str(c.Spk_num), str(c.UttPerSpk)), mode='w', encoding='utf-8') as wf: wf.write("步数{}_平均每次训练耗时:{}\n".format(grad_steps, total_times / grad_steps)) wf.write("开始训练时间_结束训练时间_结束步数训练时间(包括验证读写文件等)_耗时(结束训练时间-开始训练时间)\n") for line in train_times: wf.write(line + '\n')
def main(): PRE_TRAIN = c.PRE_TRAIN print('Looking for fbank features [.npy] files in {}.'.format( c.DATASET_DIR)) libri = data_catalog(c.DATASET_DIR) unique_speakers = libri['speaker'].unique() speaker_utterance_dict, unique_speakers = create_dict( libri['filename'].values, libri['speaker'].values, unique_speakers) select_batch.create_data_producer(unique_speakers, speaker_utterance_dict) orig_time = time() model = convolutional_model(input_shape=c.INPUT_SHAPE, batch_size=c.BATCH_SIZE, num_frames=c.NUM_FRAMES) print(model.summary()) grad_steps = 0 if PRE_TRAIN: last_checkpoint = get_last_checkpoint_if_any(c.PRE_CHECKPOINT_FOLDER) if last_checkpoint is not None: print('Found pre-training checkpoint [{}]. Resume from here...'. format(last_checkpoint)) x = model.output x = Dense(len(unique_speakers), activation='softmax', name='softmax_layer')(x) pre_model = Model(model.input, x) pre_model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) grad_steps = 0 print('Successfully loaded pre-training model') else: last_checkpoint = get_last_checkpoint_if_any(c.CHECKPOINT_FOLDER) if last_checkpoint is not None: print('Found checkpoint [{}]. Resume from here...'.format( last_checkpoint)) model.load_weights(last_checkpoint) grad_steps = int(last_checkpoint.split('_')[-2]) print('[DONE]') adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model.compile(optimizer='adam', loss=deep_speaker_loss) print("model_build_time", time() - orig_time) print('Starting training...') last_loss = 10 while True: orig_time = time() #x, _ = select_batch.best_batch(model, batch_size=c.BATCH_SIZE) #y = np.random.uniform(size=(x.shape[0], 1)) x, y = random_batch(libri, c.BATCH_SIZE) print('== Presenting step #{0}'.format(grad_steps)) orig_time = time() loss = model.train_on_batch(x, y) print('== Processed in {0:.2f}s by the network, training loss = {1}.'. format(time() - orig_time, loss)) # record training loss with open(c.LOSS_LOG, "a") as f: f.write("{0},{1}\n".format(grad_steps, loss)) # checkpoints are really heavy so let's just keep the last one. if (grad_steps) % c.SAVE_PER_EPOCHS == 0: create_dir_and_delete_content(c.CHECKPOINT_FOLDER) model.save_weights('{0}/model_{1}_{2:.5f}.h5'.format( c.CHECKPOINT_FOLDER, grad_steps, loss)) if loss < last_loss: files = sorted(filter( lambda f: os.path.isfile(f) and f.endswith(".h5"), map(lambda f: os.path.join(c.BEST_CHECKPOINT_FOLDER, f), os.listdir(c.BEST_CHECKPOINT_FOLDER))), key=lambda file: file.split('/')[-1].split('.')[ -2], reverse=True) last_loss = loss for file in files[:-4]: print("removing old model: {}".format(file)) os.remove(file) model.save_weights( c.BEST_CHECKPOINT_FOLDER + '/best_model{0}_{1:.5f}.h5'.format(grad_steps, loss)) grad_steps += 1