def test_df_pipeline(self): samples_key = 'sample' model_output_key = 'embedding' model_output_dim = 512 saved_model_fn_ = get_data.savedmodel_to_func( hub.load(HUB_HANDLE_), output_key=model_output_key) ds = tf.data.Dataset.from_tensors({ samples_key: tf.sparse.SparseTensor(indices=[[0, 0]], values=[1.0], dense_shape=[1, 32000]) }).repeat() min_length = 15360 # 960 ms batch_size = 3 ds = get_data.tf_data_pipeline(ds, saved_model_fn_, samples_key, min_length, batch_size, model_output_dim) for i, (wav_samples, embeddings) in enumerate(ds): wav_samples.shape.assert_is_compatible_with( [batch_size, min_length]) embeddings.shape.assert_is_compatible_with( [batch_size, model_output_dim]) if i > 2: break
def train_and_report(debug=False): """Trains the classifier.""" logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.train_batch_size) reader = tf.data.TFRecordDataset if FLAGS.precomputed_frontend_and_targets: ds = get_data.get_precomputed_data( file_pattern=FLAGS.file_pattern, output_dimension=FLAGS.output_dimension, frontend_key=FLAGS.frontend_key, target_key=FLAGS.target_key, batch_size=FLAGS.train_batch_size, num_epochs=FLAGS.num_epochs, shuffle_buffer_size=FLAGS.shuffle_buffer_size) ds.element_spec[0].shape.assert_has_rank(3) # log Mel spectrograms ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings else: ds = get_data.get_data(file_pattern=FLAGS.file_pattern, teacher_fn=get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key), output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.train_batch_size, loop_forever=True, shuffle=True, shuffle_buffer_size=FLAGS.shuffle_buffer_size) assert len(ds.element_spec) == 2, ds.element_spec ds.element_spec[0].shape.assert_has_rank(2) # audio samples ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings output_dimension = ds.element_spec[1].shape[1] assert output_dimension == FLAGS.output_dimension # Define loss and optimizer hyparameters. loss_obj = tf.keras.losses.MeanSquaredError(name='mse_loss') opt = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8) global_step = opt.iterations # Create model, loss, and other objects. compressor = None if FLAGS.compression_op: custom_params = ','.join([ 'compression_frequency=%d', 'rank=%d', 'begin_compression_step=%d', 'end_compression_step=%d', 'alpha_decrement_value=%d', ]) % (FLAGS.comp_freq, FLAGS.comp_rank, FLAGS.comp_begin_step, FLAGS.comp_end_step, FLAGS.alpha_step_size) compression_params = compression.CompressionOp.get_default_hparams( ).parse(custom_params) compressor = compression_wrapper.get_apply_compression( compression_params, global_step=global_step) model = models.get_keras_model( bottleneck_dimension=FLAGS.bottleneck_dimension, output_dimension=output_dimension, alpha=FLAGS.alpha, mobilenet_size=FLAGS.mobilenet_size, frontend=not FLAGS.precomputed_frontend_and_targets, avg_pool=FLAGS.average_pool, compressor=compressor, quantize_aware_training=FLAGS.quantize_aware_training) model.summary() # Add additional metrics to track. train_loss = tf.keras.metrics.MeanSquaredError(name='train_loss') train_mae = tf.keras.metrics.MeanAbsoluteError(name='train_mae') summary_writer = tf.summary.create_file_writer(FLAGS.logdir) train_step = get_train_step(model, loss_obj, opt, train_loss, train_mae, summary_writer) checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) manager = tf.train.CheckpointManager( checkpoint, FLAGS.logdir, max_to_keep=FLAGS.checkpoint_max_to_keep) logging.info('Checkpoint prefix: %s', FLAGS.logdir) checkpoint.restore(manager.latest_checkpoint) if debug: return for inputs, targets in ds: if FLAGS.precomputed_frontend_and_targets: # inputs are spectrograms inputs.shape.assert_has_rank(3) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, 96, 64]) else: # inputs are audio vectors inputs.shape.assert_has_rank(2) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.min_length]) targets.shape.assert_has_rank(2) targets.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.output_dimension]) train_step(inputs, targets, global_step) # Optional print output and save model. if global_step % 10 == 0: logging.info('step: %i, train loss: %f, train mean abs error: %f', global_step, train_loss.result(), train_mae.result()) if global_step % FLAGS.measurement_store_interval == 0: manager.save(checkpoint_number=global_step) manager.save(checkpoint_number=global_step) logging.info('Finished training.')
def test_savedmodel_to_func(self): get_data.savedmodel_to_func(hub.load(HUB_HANDLE_), output_key='embedding')
def eval_and_report(): """Eval on voxceleb.""" tf.logging.info('samples_key: %s', FLAGS.samples_key) logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.batch_size) writer = tf.summary.create_file_writer(FLAGS.eval_dir) model = models.get_keras_model( bottleneck_dimension=FLAGS.bottleneck_dimension, output_dimension=FLAGS.output_dimension, alpha=FLAGS.alpha, mobilenet_size=FLAGS.mobilenet_size, frontend=not FLAGS.precomputed_frontend_and_targets, avg_pool=FLAGS.average_pool) checkpoint = tf.train.Checkpoint(model=model) for ckpt in tf.train.checkpoints_iterator(FLAGS.logdir, timeout=FLAGS.timeout): assert 'ckpt-' in ckpt, ckpt step = ckpt.split('ckpt-')[-1] logging.info('Starting to evaluate step: %s.', step) checkpoint.restore(ckpt) logging.info('Loaded weights for eval step: %s.', step) reader = tf.data.TFRecordDataset ds = get_data.get_data(file_pattern=FLAGS.file_pattern, teacher_fn=get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key), output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.batch_size, loop_forever=False, shuffle=False) logging.info('Got dataset for eval step: %s.', step) if FLAGS.take_fixed_data: ds = ds.take(FLAGS.take_fixed_data) mse_m = tf.keras.metrics.MeanSquaredError() mae_m = tf.keras.metrics.MeanAbsoluteError() logging.info('Starting the ds loop...') count, ex_count = 0, 0 s = time.time() for wav_samples, targets in ds: wav_samples.shape.assert_is_compatible_with( [None, FLAGS.min_length]) targets.shape.assert_is_compatible_with( [None, FLAGS.output_dimension]) logits = model(wav_samples, training=False)['embedding_to_target'] logits.shape.assert_is_compatible_with(targets.shape) mse_m.update_state(y_true=targets, y_pred=logits) mae_m.update_state(y_true=targets, y_pred=logits) ex_count += logits.shape[0] count += 1 logging.info('Saw %i examples after %i iterations as %.2f secs...', ex_count, count, time.time() - s) with writer.as_default(): tf.summary.scalar('mse', mse_m.result().numpy(), step=int(step)) tf.summary.scalar('mae', mae_m.result().numpy(), step=int(step)) logging.info('Done with eval step: %s in %.2f secs.', step, time.time() - s)
def train_and_report(debug=False): """Trains the classifier.""" logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.train_batch_size) reader = tf.data.TFRecordDataset target_key = FLAGS.target_key if FLAGS.precomputed_targets: teacher_fn = None assert target_key is not None assert FLAGS.output_key is None else: teacher_fn = get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key) assert target_key is None ds = get_data.get_data(file_patterns=FLAGS.file_patterns, output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.train_batch_size, loop_forever=True, shuffle=True, teacher_fn=teacher_fn, target_key=target_key, normalize_to_pm_one=FLAGS.normalize_to_pm_one, shuffle_buffer_size=FLAGS.shuffle_buffer_size) assert len(ds.element_spec) == 2, ds.element_spec ds.element_spec[0].shape.assert_has_rank(2) # audio samples ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings output_dimension = ds.element_spec[1].shape[1] assert output_dimension == FLAGS.output_dimension # Define loss and optimizer hyparameters. loss_obj = tf.keras.losses.MeanSquaredError(name='mse_loss') opt = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8) global_step = opt.iterations # Create model, loss, and other objects. model = models.get_keras_model(model_type=FLAGS.model_type, output_dimension=output_dimension, truncate_output=FLAGS.truncate_output, frontend=True, spec_augment=FLAGS.spec_augment) model.summary() # Add additional metrics to track. train_loss = tf.keras.metrics.MeanSquaredError(name='train_loss') train_mae = tf.keras.metrics.MeanAbsoluteError(name='train_mae') summary_writer = tf.summary.create_file_writer(FLAGS.logdir) train_step = get_train_step(model, loss_obj, opt, train_loss, train_mae, summary_writer) checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) manager = tf.train.CheckpointManager( checkpoint, FLAGS.logdir, max_to_keep=FLAGS.checkpoint_max_to_keep) logging.info('Checkpoint prefix: %s', FLAGS.logdir) checkpoint.restore(manager.latest_checkpoint) if debug: return for inputs, targets in ds: # Inputs are audio vectors. inputs.shape.assert_has_rank(2) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.min_length]) targets.shape.assert_has_rank(2) targets.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.output_dimension]) train_step(inputs, targets, global_step) # Optional print output and save model. if global_step % 10 == 0: logging.info('step: %i, train loss: %f, train mean abs error: %f', global_step, train_loss.result(), train_mae.result()) if global_step % FLAGS.measurement_store_interval == 0: manager.save(checkpoint_number=global_step) manager.save(checkpoint_number=global_step) logging.info('Finished training.')