def load_models(speakers): if len(file_listing(MODEL_PERSIST_PATH, 'gmm')) > 0: print('Loading saved GMM models from file') return {speaker: load_gmm(speaker) for speaker in speakers} else: print('GMMs need to be trained first') exit(1)
def train(model, args, experiment=None): train_seq = TrainDatasetSequence(args.train_dataset, batch_size=args.batch_size, img_size=(args.img_w, args.img_h)) test_seq = TestDatasetSequence(args.test_dataset, batch_size=args.batch_size, img_size=(args.img_w, args.img_h)) model.compile(optimizer=Adam(lr=0.0002), loss=perceptual_loss(input_shape=(args.img_h, args.img_w, 3))) model.summary() if args.weights: model.load_weights(args.weights) callbacks = [ ModelCheckpoint( args.model_save_path + 'fusion_unet_{epoch:02d}_{val_loss:.3f}.h5', save_weights_only=True, verbose=1), TerminateOnNaN() ] if experiment is not None: callbacks.append(LogImages(experiment, paths=file_listing(args.validation_path), img_size=(args.img_w, args.img_h))) model.fit_generator( train_seq, epochs=args.epochs, validation_data=test_seq, use_multiprocessing=True, workers=8, callbacks=callbacks)
def train(model, args, experiment=None): train_seq = TrainDatasetSequence(args.train_dataset, batch_size=args.batch_size, img_size=(args.img_w, args.img_h), use_lab=True) model.compile(optimizer=Adam(lr=0.001), loss='mse') model.summary() if args.weights: model.load_weights(args.weights) callbacks = [ ModelCheckpoint(args.model_save_path + 'encoder_decoder_mse_{epoch:02d}.h5', save_weights_only=True, verbose=1), TerminateOnNaN() ] if experiment is not None: callbacks.append( LogImages(experiment, paths=file_listing(args.validation_path), img_size=(args.img_w, args.img_h), log_iters=500, use_lab=True)) model.fit_generator(train_seq, epochs=args.epochs, use_multiprocessing=True, workers=8, callbacks=callbacks)
def read_pair(line): line_stripped = line.strip().split() if len(line_stripped) == 3: # Same person line - <name> <img_l_id> <img_r_id> name = line_stripped[0] img_l = int(line_stripped[1]) - 1 img_r = int(line_stripped[2]) - 1 imgs = file_listing(LFW_PATH + '/' + name, 'jpg') return (imgs[img_l], imgs[img_r], True) elif len(line_stripped) == 4: # Different people line - <name_l> <img_l_id> <name_r> <img_r_id> name_l = line_stripped[0] img_l = int(line_stripped[1]) - 1 name_r = line_stripped[2] img_r = int(line_stripped[3]) - 1 imgs_l = file_listing(LFW_PATH + '/' + name_l, 'jpg')[img_l] imgs_r = file_listing(LFW_PATH + '/' + name_r, 'jpg')[img_r] return (imgs_l, imgs_r, name_l == name_r)
def __init__(self, base_test_path, batch_size=32, input_size=(32, 32), scale=3): self.batch_size = batch_size self.paths = file_listing(base_test_path, extension='JPEG') self.input_size = input_size self.scale = scale
def read_dataset_dir(base_dir): dirs = dir_listing(base_dir) X = {last_component(dir): np.empty(OUTPUT_DIM) for dir in dirs} for dir_path in dirs: for file in file_listing(dir_path, 'wav'): speaker = last_component(dir_path) audio_np = read_wav(file) X[speaker] = np.vstack((X[speaker], audio_np)) return X
def train(args, experiment=None): LOG_PERIOD = 1000 # Dataset train_seq = TrainDatasetSequence(args.train_dataset, batch_size=args.batch_size // 2, img_size=(args.img_w, args.img_h)) # Build GAN gan, generator, discriminator = build_gan( (args.img_w, args.img_h), args.generator_weights, args.discriminator_weights, args.gan_weights) # Log images callback log_images_callback = LogImages(experiment, paths=file_listing(args.validation_path), model=generator, img_size=(args.img_w, args.img_h), log_iters=LOG_PERIOD) on_batch_end = log_images_callback.on_batch_end on_epoch_end = log_images_callback.on_epoch_end # Create parallel queue to load images asynchronously batch_queue = create_parallel_queue(train_seq) # Train GAN for epoch in range(args.epochs): print(f'Epoch {epoch + 1}') for batch_idx in tqdm(range(len(train_seq))): gray_imgs, rgb_imgs = next(batch_queue) disc_loss = train_discriminator(gan, generator, discriminator, gray_imgs, rgb_imgs, args.batch_size) _, perceptual_loss, gen_loss = train_generator( gan, discriminator, gray_imgs, rgb_imgs, args.batch_size) # CometML logs metrics = { 'discriminator_loss': disc_loss, 'generator_loss': gen_loss, 'perceptual_loss': perceptual_loss } experiment.log_metrics(metrics, step=batch_idx, epoch=epoch) on_batch_end(log_images_callback.iter, None) # Save model if batch_idx % LOG_PERIOD == 0: path_suffix = f'{args.img_w}x{args.img_h}_epoch-{epoch + 1}_{int(batch_idx / 1000)}K.h5' gan.save_weights(f'{args.model_save_path}/gan_{path_suffix}') generator.save_weights( f'{args.model_save_path}/generator_{path_suffix}') on_epoch_end(epoch, None)
def train(model, args, experiment): output_shape = (args.input_h * args.scale, args.input_w * args.scale, 3) train_seq = TrainDatasetSequence(args.train_dataset, batch_size=args.batch_size, input_size=(args.input_w, args.input_h), scale=args.scale) test_seq = TestDatasetSequence(args.test_dataset, batch_size=args.batch_size, input_size=(args.input_w, args.input_h), scale=args.scale) model.compile(optimizer=Adam(lr=3e-4), loss=perceptual_loss(output_shape), metrics=['mse', psnr_metric()]) model.summary() if args.weights: model.load_weights(args.weights) callbacks = [ ModelCheckpoint(args.model_save_path + 'sr_{epoch:02d}_{val_loss:.3f}.h5', save_weights_only=True, verbose=1), TerminateOnNaN(), LogImages(experiment, paths=file_listing(args.validation_path), input_size=(args.input_w, args.input_h), scale=args.scale, log_iters=500) ] model.fit_generator(train_seq, epochs=args.epochs, validation_data=test_seq, use_multiprocessing=True, workers=8, callbacks=callbacks)
def _get_image_paths(self, base_path): image_paths = [] for dirpath in dir_listing(base_path): image_paths += file_listing(dirpath, extension='jpg') return image_paths
if __name__ == '__main__': speaker_recordings, speakers = load_local_dataset() gmm_models = {speaker: init_gmm() for speaker in set(speakers)} train_gmms(gmm_models) save_gmms(gmm_models) # Calculate precision total = 0 correct = 0 wrong_files = {} for dir_path in dir_listing(relative_path('../data/dev/')): for file in file_listing(dir_path, 'wav'): print('Predicting file %s (%s/%s)' % (file, correct, total + 1)) speaker = predict(gmm_models, file) if speaker == last_component(dir_path): correct += 1 else: wrong_files[file] = (speaker, last_component(dir_path)) total += 1 print('Incorrectly classified') for file, val in wrong_files.items(): print('%s is %s but was classfied as %s' % (file, val[1], val[0])) print('Got %s correct out of %s' % (correct, total)) print('-> %s percent accuracy' % ((correct / total) * 100))
import numpy as np import face_recognition from utils import file_listing base_path = '../data/ids/' img_paths = file_listing(base_path) def get_face_emb(filename): img_np = face_recognition.load_image_file(base_path + filename) embs = face_recognition.face_encodings(img_np) return embs[0] if len(embs) > 0 else None anchor_embs = list(map(get_face_emb, img_paths)) def get_face_identity(img_np): ''' Get face identity agains anchor embeddings. Args: img_np (numpy): Input image (w, h, c) Returns: (string) name of anchor image filename without extension ''' target_embs = face_recognition.face_encodings(img_np) if len(target_embs) > 0: hits = face_recognition.compare_faces(anchor_embs,
import os from gmm_train import load_models, get_gmm_path, predict from dataset import get_speakers from utils import file_listing, dir_listing, relative_path, get_file_name EVAL_DIR_PATH = relative_path('../data/eval/') if __name__ == '__main__': speakers = get_speakers() gmm_models = load_models(speakers) for file in file_listing(EVAL_DIR_PATH, 'wav'): speaker = predict(gmm_models, file) print('%s -> %s' % (get_file_name(file), speaker))
def __init__(self, base_test_path, batch_size=128, img_size=(256, 256)): self.batch_size = batch_size self.paths = file_listing(base_test_path, extension='JPEG') self.img_size = img_size