def export_model(params, model_dir): """Export a trained TF model for inference format. Args: params (dict): dictionary of model training parameters model_dir (str, optional): path to pre-trained model directory. Defaults to None """ logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) logger = logging.getLogger(__name__) np.random.seed(0) tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.INFO) train_config = tf.estimator.RunConfig( save_summary_steps=10, save_checkpoints_steps=1000, keep_checkpoint_max=20, log_step_count_steps=10, ) estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator( model_fn=estimator_obj.model_fn, model_dir=model_dir, config=train_config, params=params, ) feature_spec = { "features_input": tf.placeholder( dtype=tf.float32, shape=[1, FEAT_SIZE[1], FEAT_SIZE[0]], ) } logger.info("Exporting TensorFlow trained model ...") raw_serving_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec, default_batch_size=1, ) estimator.export_savedmodel(model_dir, raw_serving_fn, strip_default_attrs=True)
def main(): parser = ArgumentParser( description='export trained TensorFlow model for inference') parser.add_argument('--model-dir', type=str, help='pretrained model directory') parser.add_argument('--model', type=str, default='resnet1d', help='model name') parser.add_argument('--n-filters', type=str, default='32-64-128') parser.add_argument('--n-kernels', type=str, default='8-5-3') parser.add_argument('--n-fc-units', type=str, default='2048-2048') parser.add_argument('--n-classes', '-n', type=int, default=1, help='number of classes') args = parser.parse_args() assert args.model in ['resnet1d'], 'Wrong model name' assert len( args.n_filters.split('-')) == 3, '3 values required for --n-filters' assert len( args.n_kernels.split('-')) == 3, '3 values required for --n-kernels' assert len( args.n_fc_units.split('-')) == 2, '2 values required --n-fc-units' basicConfig(level=DEBUG, stream=stdout) logger = getLogger(__name__) np.random.seed(0) tf.compat.v1.set_random_seed(0) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) save_dir = args.model_dir np.random.seed(0) tf.compat.v1.set_random_seed(0) params = { 'model': args.model, 'n_classes': args.n_classes, 'n_cnn_filters': [int(x) for x in args.n_filters.split('-')], 'n_cnn_kernels': [int(x) for x in args.n_kernels.split('-')], 'n_fc_units': [int(x) for x in args.n_fc_units.split('-')] } train_config = tf.estimator.RunConfig(save_summary_steps=10, save_checkpoints_steps=1000, keep_checkpoint_max=20, log_step_count_steps=10) estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator(model_fn=estimator_obj.model_fn, model_dir=save_dir, config=train_config, params=params) feature_spec = { 'features_input': tf.compat.v1.placeholder(dtype=tf.float32, shape=[1, FEAT_SIZE[1], FEAT_SIZE[0]]) } logger.info('Exporting TensorFlow trained model ...') raw_serving_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec, default_batch_size=1) estimator.export_saved_model(save_dir, raw_serving_fn)
def main(): parser = ArgumentParser(description='train CNN for VAD') parser.add_argument('--data-dir', '-d', type=str, default='../LibriSpeech/tfrecords/', help='tf records data directory') parser.add_argument('--model-dir', type=str, default='', help='pretrained model directory') parser.add_argument('--ckpt', type=str, default='', help='pretrained checkpoint directory') parser.add_argument('--mode', '-m', type=str, default='train', help='train, eval or predict') parser.add_argument('--model', type=str, default='resnet1d', help='model name') parser.add_argument('--input-size', type=int, default=1024, help='signal input size') parser.add_argument('--batch-size', '-bs', type=int, default=32, help='batch size') parser.add_argument('--epochs', '-e', type=int, default=20, help='train epochs') parser.add_argument('--n-filters', type=str, default='32-64-128') parser.add_argument('--n-kernels', type=str, default='8-5-3') parser.add_argument('--n-fc-units', type=str, default='2048-2048') parser.add_argument('--n-classes', '-n', type=int, default=1, help='number of classes in output') parser.add_argument('--learning-rate', '-lr', type=float, default=0.00001, help='initial learning rate') parser.add_argument('--fake-input', action='store_true', default=False, help='debug with 1 batch training') parser.add_argument('--subsample', action='store_true', default=False, help='subsample signal') args = parser.parse_args() assert args.model in ['resnet1d'], 'Wrong model name' assert len( args.n_filters.split('-')) == 3, '3 values required for --n-filters' assert len( args.n_kernels.split('-')) == 3, '3 values required for --n-kernels' assert len( args.n_fc_units.split('-')) == 2, '2 values required --n-fc-units' tfrecords_train = glob('{}train/*.tfrecord'.format(args.data_dir)) tfrecords_val = glob('{}val/*.tfrecord'.format(args.data_dir)) tfrecords_test = glob('{}test/*.tfrecord'.format(args.data_dir)) basicConfig(level=DEBUG, stream=stdout) logger = getLogger(__name__) np.random.seed(0) tf.compat.v1.set_random_seed(0) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) if not args.model_dir: save_dir = '{}models/{}/{}/'.format(args.data_dir, args.model, datetime.now().isoformat()) if not exists(save_dir): makedirs(save_dir) else: save_dir = args.model_dir params = { 'model': args.model, 'batch_size': args.batch_size, 'epochs': args.epochs, 'n_cnn_filters': [int(x) for x in args.n_filters.split('-')], 'n_cnn_kernels': [int(x) for x in args.n_kernels.split('-')], 'n_fc_units': [int(x) for x in args.n_fc_units.split('-')], 'n_classes': args.n_classes, 'lr': args.learning_rate, } train_config = tf.estimator.RunConfig(save_summary_steps=10, save_checkpoints_steps=500, keep_checkpoint_max=2, log_step_count_steps=10) ws = None if args.ckpt: ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=args.ckpt, vars_to_warm_start='.*') # Create TensorFlow estimator object estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator(model_fn=estimator_obj.model_fn, model_dir=save_dir, config=train_config, params=params, warm_start_from=ws) mode_keys = { 'train': tf.estimator.ModeKeys.TRAIN, 'eval': tf.estimator.ModeKeys.EVAL, 'predict': tf.estimator.ModeKeys.PREDICT } mode = mode_keys[args.mode] # Training & Evaluation on Train / Val set if mode == tf.estimator.ModeKeys.TRAIN: train_input_fn = data_input_fn(tfrecords_train, batch_size=params['batch_size'], epochs=1, input_size=args.input_size, n_classes=params['n_classes'], subsample=args.subsample, shuffle=True, fake_input=args.fake_input) eval_input_fn = data_input_fn(tfrecords_val, batch_size=params['batch_size'], epochs=1, input_size=args.input_size, n_classes=params['n_classes'], subsample=args.subsample, shuffle=False, fake_input=args.fake_input) for epoch_num in range(params['epochs']): logger.info("Training for epoch {} ...".format(epoch_num)) estimator.train(input_fn=train_input_fn) logger.info("Evaluation for epoch {} ...".format(epoch_num)) estimator.evaluate(input_fn=eval_input_fn) # Evaluation on Test set elif mode == tf.estimator.ModeKeys.EVAL: test_input_fn = data_input_fn(tfrecords_val, batch_size=params['batch_size'], epochs=1, input_size=args.input_size, n_classes=params['n_classes'], subsample=args.subsample, shuffle=False, fake_input=args.fake_input) logger.info("Evaluation of test set ...") estimator.evaluate(input_fn=test_input_fn) # Prediction visualization on Test set elif mode == tf.estimator.ModeKeys.PREDICT: test_input_fn = data_input_fn(tfrecords_test, batch_size=params['batch_size'], epochs=1, input_size=args.input_size, n_classes=params['n_classes'], subsample=args.subsample, shuffle=False, fake_input=args.fake_input) classes = ['Noise', 'Speech'] predictions = estimator.predict(input_fn=test_input_fn) for n, pred in enumerate(predictions): signal_input = pred['signal_input'] pred = pred['speech'] # Plot signal (uncomment # 'signal_input': features['signal_input'] in estimator.py) sns.set() sns.lineplot(x=[i for i in range(len(signal_input[:, 0]))], y=signal_input[:, 0]) plt.title('Signal = {}'.format(classes[int(np.round(pred))])) plt.xlabel('Time (num. points)') plt.ylabel('Amplitude') plt.show()
def main(): parser = argparse.ArgumentParser( description="export trained TensorFlow model for inference") parser.add_argument("--model-dir", type=str, default="", help="pretrained model directory") parser.add_argument("--ckpt", type=str, default="", help="pretrained checkpoint directory") parser.add_argument("--model", type=str, default="resnet1d", help="model name") parser.add_argument("--n-filters", type=str, default="32-64-128") parser.add_argument("--n-kernels", type=str, default="8-5-3") parser.add_argument("--n-fc-units", type=str, default="2048-2048") parser.add_argument("--n-classes", "-n", type=int, default=1, help="number of classes") args = parser.parse_args() assert args.model in ["resnet1d"], "Wrong model name" assert len( args.n_filters.split("-")) == 3, "3 values required for --n-filters" assert len( args.n_kernels.split("-")) == 3, "3 values required for --n-kernels" assert len( args.n_fc_units.split("-")) == 2, "2 values required --n-fc-units" logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) logger = logging.getLogger(__name__) np.random.seed(0) tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.INFO) save_dir = args.model_dir np.random.seed(0) tf.set_random_seed(0) params = { "model": args.model, "n_classes": args.n_classes, "n_cnn_filters": [int(x) for x in args.n_filters.split("-")], "n_cnn_kernels": [int(x) for x in args.n_kernels.split("-")], "n_fc_units": [int(x) for x in args.n_fc_units.split("-")], } train_config = tf.estimator.RunConfig( save_summary_steps=10, save_checkpoints_steps=1000, keep_checkpoint_max=20, log_step_count_steps=10, ) estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator( model_fn=estimator_obj.model_fn, model_dir=save_dir, config=train_config, params=params, ) feature_spec = { "features_input": tf.placeholder(dtype=tf.float32, shape=[1, FEAT_SIZE[1], FEAT_SIZE[0]]) } logger.info("Exporting TensorFlow trained model ...") raw_serving_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec, default_batch_size=1) estimator.export_savedmodel(save_dir, raw_serving_fn, strip_default_attrs=True)
def train( params, data_dir, model_dir=None, ckpt=None, mode="train", fake_input=False, ): """Train a CNN for Voice Activity Detection on a TFRecords dataset. Args: params (dict): dictionary of model training parameters data_dir (str): path to TFRecords dataset directory model_dir (str, optional): path to trained model directory. Defaults to None ckpt (str, optional): path to pre-trained checkpoint directory. Default to None mode (str, optional): TF estimator mode, one of ["train", "eval", "predict"]. Defaults to "train" fake_input (bool, optional): debugging option to train on 1 batch. Defaults to False """ tfrecords_train = glob.glob(f"{data_dir}train/*.tfrecord") tfrecords_val = glob.glob(f"{data_dir}val/*.tfrecord") tfrecords_test = glob.glob(f"{data_dir}test/*.tfrecord") logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) logger = logging.getLogger(__name__) np.random.seed(0) tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.INFO) model = params["model"] input_size = params["input_size"] subsample = params["subsample"] if not model_dir: save_dir = f"{data_dir}models/{model}/{datetime.now().isoformat()}/" if not os.path.exists(save_dir): os.makedirs(save_dir) else: save_dir = model_dir train_config = tf.estimator.RunConfig( save_summary_steps=10, save_checkpoints_steps=500, keep_checkpoint_max=20, log_step_count_steps=10, ) ws = None if ckpt: ws = tf.estimator.WarmStartSettings( ckpt_to_initialize_from=ckpt, vars_to_warm_start=".*", ) # Create TensorFlow estimator object estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator( model_fn=estimator_obj.model_fn, model_dir=save_dir, config=train_config, params=params, warm_start_from=ws, ) mode_keys = { "train": tf.estimator.ModeKeys.TRAIN, "eval": tf.estimator.ModeKeys.EVAL, "predict": tf.estimator.ModeKeys.PREDICT, } mode = mode_keys[mode] # Training & Evaluation on Train / Val set if mode == tf.estimator.ModeKeys.TRAIN: train_input_fn = data_input_fn( tfrecords_train, batch_size=params["batch_size"], epochs=1, input_size=input_size, n_classes=params["n_classes"], subsample=subsample, shuffle=True, fake_input=fake_input, ) eval_input_fn = data_input_fn( tfrecords_val, batch_size=params["batch_size"], epochs=1, input_size=input_size, n_classes=params["n_classes"], subsample=subsample, shuffle=False, fake_input=fake_input, ) for epoch_num in range(params["epochs"]): logger.info(f"Training for epoch {epoch_num} ...") estimator.train(input_fn=train_input_fn) logger.info(f"Evaluation for epoch {epoch_num} ...") estimator.evaluate(input_fn=eval_input_fn) # Evaluation on Test set elif mode == tf.estimator.ModeKeys.EVAL: test_input_fn = data_input_fn( tfrecords_val, batch_size=params["batch_size"], epochs=1, input_size=input_size, n_classes=params["n_classes"], subsample=subsample, shuffle=False, fake_input=fake_input, ) logger.info("Evaluation of test set ...") estimator.evaluate(input_fn=test_input_fn) # Prediction visualization on Test set elif mode == tf.estimator.ModeKeys.PREDICT: test_input_fn = data_input_fn( tfrecords_test, batch_size=params["batch_size"], epochs=1, input_size=input_size, n_classes=params["n_classes"], subsample=subsample, shuffle=False, fake_input=fake_input, ) classes = ["Noise", "Speech"] predictions = estimator.predict(input_fn=test_input_fn) for n, pred in enumerate(predictions): signal_input = pred["signal_input"] pred = pred["speech"] # Plot signal (uncomment # 'signal_input': features['signal_input'] in estimator.py) sns.set() sns.lineplot( x=[i for i in range(len(signal_input[:, 0]))], y=signal_input[:, 0], ) plt.title(f"Signal = {classes[int(np.round(pred))]}") plt.xlabel("Time (num. points)") plt.ylabel("Amplitude") plt.show()
def main(): parser = argparse.ArgumentParser(description="train CNN for VAD") parser.add_argument( "--data-dir", "-d", type=str, default="/home/filippo/datasets/LibriSpeech/tfrecords/", help="tf records data directory", ) parser.add_argument("--model-dir", type=str, default="", help="pretrained model directory") parser.add_argument("--ckpt", type=str, default="", help="pretrained checkpoint directory") parser.add_argument("--mode", "-m", type=str, default="train", help="train, eval or predict") parser.add_argument("--model", type=str, default="resnet1d", help="model name") parser.add_argument("--input-size", type=int, default=1024, help="signal input size") parser.add_argument("--batch-size", "-bs", type=int, default=32, help="batch size") parser.add_argument("--epochs", "-e", type=int, default=20, help="train epochs") parser.add_argument("--n-filters", type=str, default="32-64-128") parser.add_argument("--n-kernels", type=str, default="8-5-3") parser.add_argument("--n-fc-units", type=str, default="2048-2048") parser.add_argument( "--n-classes", "-n", type=int, default=1, help="number of classes in output", ) parser.add_argument( "--learning-rate", "-lr", type=float, default=0.00001, help="initial learning rate", ) parser.add_argument( "--fake-input", action="store_true", default=False, help="debug with 1 batch training", ) parser.add_argument( "--subsample", action="store_true", default=False, help="subsample signal", ) args = parser.parse_args() assert args.model in ["resnet1d"], "Wrong model name" assert len( args.n_filters.split("-")) == 3, "3 values required for --n-filters" assert len( args.n_kernels.split("-")) == 3, "3 values required for --n-kernels" assert len( args.n_fc_units.split("-")) == 2, "2 values required --n-fc-units" tfrecords_train = glob.glob(f"{args.data_dir}train/*.tfrecord") tfrecords_val = glob.glob(f"{args.data_dir}val/*.tfrecord") tfrecords_test = glob.glob(f"{args.data_dir}test/*.tfrecord") logging.basicConfig(level=logging.DEBUG, stream=sys.stdout) logger = logging.getLogger(__name__) np.random.seed(0) tf.set_random_seed(0) tf.logging.set_verbosity(tf.logging.INFO) if not args.model_dir: save_dir = f"{args.data_dir}models/{args.model}/{datetime.now().isoformat()}/" if not os.path.exists(save_dir): os.makedirs(save_dir) else: save_dir = args.model_dir params = { "model": args.model, "batch_size": args.batch_size, "epochs": args.epochs, "n_cnn_filters": [int(x) for x in args.n_filters.split("-")], "n_cnn_kernels": [int(x) for x in args.n_kernels.split("-")], "n_fc_units": [int(x) for x in args.n_fc_units.split("-")], "n_classes": args.n_classes, "lr": args.learning_rate, } train_config = tf.estimator.RunConfig( save_summary_steps=10, save_checkpoints_steps=500, keep_checkpoint_max=20, log_step_count_steps=10, ) ws = None if args.ckpt: ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=args.ckpt, vars_to_warm_start=".*") # Create TensorFlow estimator object estimator_obj = VadEstimator(params) estimator = tf.estimator.Estimator( model_fn=estimator_obj.model_fn, model_dir=save_dir, config=train_config, params=params, warm_start_from=ws, ) mode_keys = { "train": tf.estimator.ModeKeys.TRAIN, "eval": tf.estimator.ModeKeys.EVAL, "predict": tf.estimator.ModeKeys.PREDICT, } mode = mode_keys[args.mode] # Training & Evaluation on Train / Val set if mode == tf.estimator.ModeKeys.TRAIN: train_input_fn = data_input_fn( tfrecords_train, batch_size=params["batch_size"], epochs=1, input_size=args.input_size, n_classes=params["n_classes"], subsample=args.subsample, shuffle=True, fake_input=args.fake_input, ) eval_input_fn = data_input_fn( tfrecords_val, batch_size=params["batch_size"], epochs=1, input_size=args.input_size, n_classes=params["n_classes"], subsample=args.subsample, shuffle=False, fake_input=args.fake_input, ) for epoch_num in range(params["epochs"]): logger.info(f"Training for epoch {epoch_num} ...") estimator.train(input_fn=train_input_fn) logger.info(f"Evaluation for epoch {epoch_num} ...") estimator.evaluate(input_fn=eval_input_fn) # Evaluation on Test set elif mode == tf.estimator.ModeKeys.EVAL: test_input_fn = data_input_fn( tfrecords_val, batch_size=params["batch_size"], epochs=1, input_size=args.input_size, n_classes=params["n_classes"], subsample=args.subsample, shuffle=False, fake_input=args.fake_input, ) logger.info("Evaluation of test set ...") estimator.evaluate(input_fn=test_input_fn) # Prediction visualization on Test set elif mode == tf.estimator.ModeKeys.PREDICT: test_input_fn = data_input_fn( tfrecords_test, batch_size=params["batch_size"], epochs=1, input_size=args.input_size, n_classes=params["n_classes"], subsample=args.subsample, shuffle=False, fake_input=args.fake_input, ) classes = ["Noise", "Speech"] predictions = estimator.predict(input_fn=test_input_fn) for n, pred in enumerate(predictions): signal_input = pred["signal_input"] pred = pred["speech"] # Plot signal (uncomment # 'signal_input': features['signal_input'] in estimator.py) sns.set() sns.lineplot( x=[i for i in range(len(signal_input[:, 0]))], y=signal_input[:, 0], ) plt.title(f"Signal = {classes[int(np.round(pred))]}") plt.xlabel("Time (num. points)") plt.ylabel("Amplitude") plt.show()