def start_training(working_dir, pre_training_phase=True): ensures_dir(CHECKPOINTS_SOFTMAX_DIR) ensures_dir(CHECKPOINTS_TRIPLET_DIR) batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1] if pre_training_phase: logger.info('Softmax pre-training.') kc = KerasFormatConverter(working_dir) num_speakers_softmax = len(kc.categorical_speakers.speaker_ids) dsm = DeepSpeakerModel(batch_input_shape, include_softmax=True, num_speakers_softmax=num_speakers_softmax) dsm.m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR) if pre_training_checkpoint is not None: initial_epoch = int(pre_training_checkpoint.split('/')[-1].split('.')[0].split('_')[-1]) logger.info(f'Initial epoch is {initial_epoch}.') logger.info(f'Loading softmax checkpoint: {pre_training_checkpoint}.') dsm.m.load_weights(pre_training_checkpoint) # latest one. else: initial_epoch = 0 fit_model_softmax(dsm, kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test, initial_epoch=initial_epoch) else: logger.info('Training with the triplet loss.') dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False) triplet_checkpoint = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR) pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR) if triplet_checkpoint is not None: logger.info(f'Loading triplet checkpoint: {triplet_checkpoint}.') dsm.m.load_weights(triplet_checkpoint) elif pre_training_checkpoint is not None: logger.info(f'Loading pre-training checkpoint: {pre_training_checkpoint}.') # If `by_name` is True, weights are loaded into layers only if they share the # same name. This is useful for fine-tuning or transfer-learning models where # some of the layers have changed. dsm.m.load_weights(pre_training_checkpoint, by_name=True) dsm.m.compile(optimizer=SGD(), loss=deep_speaker_loss) fit_model(dsm, working_dir, NUM_FRAMES)
def load_best_model(self): state_dict = utils.load_best_checkpoint(self.checkpoint_dir) if state_dict is None: print( f"Could not load best checkpoint. Did not find under: {self.checkpoint_dir}") return self.model.load_state_dict(state_dict)
def start_training(working_dir): pre_training_phase=True ensures_dir(CHECKPOINTS_MTL_DIR) ensures_dir(CHECKPOINTS_MTL_DIR) batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1] logger.info('Started training.') kc = KerasFormatConverter(working_dir) num_speakers_softmax = len(kc.categorical_speakers.speaker_ids) logger.info(f'categorical_speakers: {kc.categorical_speakers.speaker_ids}') dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False, num_speakers_softmax=num_speakers_softmax) base_model = dsm.m x = base_model.output x = Dense(1024, name='shared')(x) y=Dense(1024,name='speaker_task')(x) speaker_out= Dense(num_speakers_softmax, activation='softmax',name='speaker_pred')(y) gender_out= Dense(1, activation='sigmoid',name='gender_pred')(x) model = Model(inputs=base_model.input, outputs=[speaker_out, gender_out]) model.compile(optimizer='adam', loss=['sparse_categorical_crossentropy','binary_crossentropy'], metrics={'speaker_pred': 'accuracy', 'gender_pred': 'binary_accuracy'}) training_checkpoint = load_best_checkpoint(CHECKPOINTS_MTL_DIR) if training_checkpoint is not None: initial_epoch = int(training_checkpoint.split('/')[-1].split('.')[0].split('_')[-1]) logger.info(f'Initial epoch is {initial_epoch}.') logger.info(f'Loading softmax checkpoint: {training_checkpoint}.') model.load_weights(training_checkpoint) # latest one. else: initial_epoch = 0 fit_model_mtl(model, kc.kx_train, kc.ky_train,kc.kg_train, kc.kx_test, kc.ky_test,kc.kg_test, initial_epoch=initial_epoch)
def main(args): np.random.seed(0) torch.manual_seed(0) start_time = time.time() utils.create_image_records(args.visualization_path) args.cuda = not args.no_cuda and torch.cuda.is_available() # ------------------------- get data loaders ------------------------- num_feature = 306 trainlabels = ["mass", "force", "friction" ] if args.baseline else args.train_labels passthrough_dict = utils.get_passthrough(trainlabels, parameter_length=25, vector_length=num_feature) train_loader, shape_test_loader, parameter_test_loader = \ dataset.getloader(args, labels=trainlabels, inframe=[0, 1, 2, 3], outframe=[4]) # ------------------------- initialize model and optimizer ------------------------- select = False if args.baseline else True model = net.PhysicsModel(num_feature=num_feature, passthrough=passthrough_dict, select=select) if args.cuda: model = model.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) start_epoch, best_epoch_error, model, optimizer = utils.load_best_checkpoint( args.resume, model, optimizer) # ------------------------- define criterion ------------------------- criterion = evaluation.image for epoch in range(args.start_epoch, args.epochs): print(trainlabels) epoch_error = train(train_loader, model, criterion, optimizer, epoch, passthrough_dict, args=args) is_best = epoch_error < best_epoch_error best_epoch_error = min(epoch_error, best_epoch_error) utils.save_checkpoint( { 'epoch': epoch + 1, 'best_epoch_error': best_epoch_error, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, is_best=is_best, checkpoint_path=args.resume, epoch=epoch) print('Time elapsed: {:.2f}s'.format(time.time() - start_time))
def main(): select = True try: sys.argv[1] except Exception: select = False print('select', select) working_dir = '/media/philippe/8TB/deep-speaker' # by construction this losses should be much higher than the normal losses. # we select batches this way. batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1] print('Testing with the triplet losses.') dsm = DeepSpeakerModel(batch_input_shape, include_softmax=False) triplet_checkpoint = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR) pre_training_checkpoint = load_best_checkpoint(CHECKPOINTS_SOFTMAX_DIR) if triplet_checkpoint is not None: print(f'Loading triplet checkpoint: {triplet_checkpoint}.') dsm.m.load_weights(triplet_checkpoint) elif pre_training_checkpoint is not None: print(f'Loading pre-training checkpoint: {pre_training_checkpoint}.') # If `by_name` is True, weights are loaded into layers only if they share the # same name. This is useful for fine-tuning or transfer-learning models where # some of the layers have changed. dsm.m.load_weights(pre_training_checkpoint, by_name=True) dsm.m.compile(optimizer='adam', loss=deep_speaker_loss) kc = KerasFormatConverter(working_dir) if select: print('TripletBatcherSelectHardNegatives()') batcher = TripletBatcherSelectHardNegatives(kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test, dsm) else: print('TripletBatcher()') batcher = TripletBatcher(kc.kx_train, kc.ky_train, kc.kx_test, kc.ky_test) batch_size = BATCH_SIZE losses = [] while True: _bx, _by = batcher.get_batch(batch_size, is_test=False) losses.append( dsm.m.evaluate(_bx, _by, verbose=0, batch_size=BATCH_SIZE)) print(np.mean(losses))
def test(working_dir, checkpoint_file=None): batch_input_shape = [None, NUM_FRAMES, NUM_FBANKS, 1] dsm = DeepSpeakerModel(batch_input_shape) if checkpoint_file is None: checkpoint_file = load_best_checkpoint(CHECKPOINTS_TRIPLET_DIR) if checkpoint_file is not None: logger.info( f'Found checkpoint [{checkpoint_file}]. Loading weights...') dsm.m.load_weights(checkpoint_file, by_name=True) else: logger.info(f'Could not find any checkpoint in {checkpoint_file}.') exit(1) fm, tpr, acc, eer = eval_model(working_dir, model=dsm) logger.info(f'f-measure = {fm:.3f}, true positive rate = {tpr:.3f}, ' f'accuracy = {acc:.3f}, equal error rate = {eer:.3f}')
def main(args): np.random.seed(0) torch.manual_seed(0) start_time = time.time() utils.create_image_records(args.visualization_path) args.cuda = not args.no_cuda and torch.cuda.is_available() # ------------------------- get data loaders ------------------------- num_feature, trainlabels = 306, args.train_labels passthrough_dict = utils.get_passthrough(trainlabels, parameter_length=25, vector_length=num_feature) train_loader, shape_test_loader, parameter_test_loader = \ dataset.getloader(args, labels=trainlabels, inframe=[0, 1, 2, 3], outframe=[4]) # ------------------------- initialize model and optimizer ------------------------- model = net.PhysicsModel(num_feature=num_feature, passthrough=passthrough_dict) if args.cuda: model = model.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) start_epoch, best_epoch_error, model, optimizer = utils.load_best_checkpoint( args.resume, model, optimizer) # ------------------------- define criterion ------------------------- criterion = evaluation.pixel shape_error = interpolate(shape_test_loader, model, criterion, start_epoch, optimizer, args=args) parameter_error = interpolate(parameter_test_loader, model, criterion, start_epoch, optimizer, args=args) print('Time elapsed: {:.2f}s'.format(time.time() - start_time))