def test_get_data(self, precomputed): if precomputed: file_pattern = self.precomputed_file_pattern teacher_fn = None target_key = self.target_key else: file_pattern = self.file_pattern # Trivial function. teacher_fn = lambda x: tf.ones([tf.shape(x)[0], self.output_dim], tf.float32) target_key = None bs = 2 ds = get_data.get_data(file_pattern=file_pattern, output_dimension=self.output_dim, reader=tf.data.TFRecordDataset, samples_key=self.samples_key, min_length=self.min_len, batch_size=bs, loop_forever=False, shuffle=True, teacher_fn=teacher_fn, target_key=target_key, shuffle_buffer_size=2) # Test that one element of the input pipeline can be successfully read. for wav_samples, targets in ds: self.assertEqual(wav_samples.shape, [bs, self.min_len]) self.assertEqual(targets.shape, [bs, self.output_dim]) break
def train_and_report(debug=False): """Trains the classifier.""" logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.train_batch_size) reader = tf.data.TFRecordDataset if FLAGS.precomputed_frontend_and_targets: ds = get_data.get_precomputed_data( file_pattern=FLAGS.file_pattern, output_dimension=FLAGS.output_dimension, frontend_key=FLAGS.frontend_key, target_key=FLAGS.target_key, batch_size=FLAGS.train_batch_size, num_epochs=FLAGS.num_epochs, shuffle_buffer_size=FLAGS.shuffle_buffer_size) ds.element_spec[0].shape.assert_has_rank(3) # log Mel spectrograms ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings else: ds = get_data.get_data(file_pattern=FLAGS.file_pattern, teacher_fn=get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key), output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.train_batch_size, loop_forever=True, shuffle=True, shuffle_buffer_size=FLAGS.shuffle_buffer_size) assert len(ds.element_spec) == 2, ds.element_spec ds.element_spec[0].shape.assert_has_rank(2) # audio samples ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings output_dimension = ds.element_spec[1].shape[1] assert output_dimension == FLAGS.output_dimension # Define loss and optimizer hyparameters. loss_obj = tf.keras.losses.MeanSquaredError(name='mse_loss') opt = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8) global_step = opt.iterations # Create model, loss, and other objects. compressor = None if FLAGS.compression_op: custom_params = ','.join([ 'compression_frequency=%d', 'rank=%d', 'begin_compression_step=%d', 'end_compression_step=%d', 'alpha_decrement_value=%d', ]) % (FLAGS.comp_freq, FLAGS.comp_rank, FLAGS.comp_begin_step, FLAGS.comp_end_step, FLAGS.alpha_step_size) compression_params = compression.CompressionOp.get_default_hparams( ).parse(custom_params) compressor = compression_wrapper.get_apply_compression( compression_params, global_step=global_step) model = models.get_keras_model( bottleneck_dimension=FLAGS.bottleneck_dimension, output_dimension=output_dimension, alpha=FLAGS.alpha, mobilenet_size=FLAGS.mobilenet_size, frontend=not FLAGS.precomputed_frontend_and_targets, avg_pool=FLAGS.average_pool, compressor=compressor, quantize_aware_training=FLAGS.quantize_aware_training) model.summary() # Add additional metrics to track. train_loss = tf.keras.metrics.MeanSquaredError(name='train_loss') train_mae = tf.keras.metrics.MeanAbsoluteError(name='train_mae') summary_writer = tf.summary.create_file_writer(FLAGS.logdir) train_step = get_train_step(model, loss_obj, opt, train_loss, train_mae, summary_writer) checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) manager = tf.train.CheckpointManager( checkpoint, FLAGS.logdir, max_to_keep=FLAGS.checkpoint_max_to_keep) logging.info('Checkpoint prefix: %s', FLAGS.logdir) checkpoint.restore(manager.latest_checkpoint) if debug: return for inputs, targets in ds: if FLAGS.precomputed_frontend_and_targets: # inputs are spectrograms inputs.shape.assert_has_rank(3) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, 96, 64]) else: # inputs are audio vectors inputs.shape.assert_has_rank(2) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.min_length]) targets.shape.assert_has_rank(2) targets.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.output_dimension]) train_step(inputs, targets, global_step) # Optional print output and save model. if global_step % 10 == 0: logging.info('step: %i, train loss: %f, train mean abs error: %f', global_step, train_loss.result(), train_mae.result()) if global_step % FLAGS.measurement_store_interval == 0: manager.save(checkpoint_number=global_step) manager.save(checkpoint_number=global_step) logging.info('Finished training.')
def eval_and_report(): """Eval on voxceleb.""" tf.logging.info('samples_key: %s', FLAGS.samples_key) logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.batch_size) writer = tf.summary.create_file_writer(FLAGS.eval_dir) model = models.get_keras_model( bottleneck_dimension=FLAGS.bottleneck_dimension, output_dimension=FLAGS.output_dimension, alpha=FLAGS.alpha, mobilenet_size=FLAGS.mobilenet_size, frontend=not FLAGS.precomputed_frontend_and_targets, avg_pool=FLAGS.average_pool) checkpoint = tf.train.Checkpoint(model=model) for ckpt in tf.train.checkpoints_iterator(FLAGS.logdir, timeout=FLAGS.timeout): assert 'ckpt-' in ckpt, ckpt step = ckpt.split('ckpt-')[-1] logging.info('Starting to evaluate step: %s.', step) checkpoint.restore(ckpt) logging.info('Loaded weights for eval step: %s.', step) reader = tf.data.TFRecordDataset ds = get_data.get_data(file_pattern=FLAGS.file_pattern, teacher_fn=get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key), output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.batch_size, loop_forever=False, shuffle=False) logging.info('Got dataset for eval step: %s.', step) if FLAGS.take_fixed_data: ds = ds.take(FLAGS.take_fixed_data) mse_m = tf.keras.metrics.MeanSquaredError() mae_m = tf.keras.metrics.MeanAbsoluteError() logging.info('Starting the ds loop...') count, ex_count = 0, 0 s = time.time() for wav_samples, targets in ds: wav_samples.shape.assert_is_compatible_with( [None, FLAGS.min_length]) targets.shape.assert_is_compatible_with( [None, FLAGS.output_dimension]) logits = model(wav_samples, training=False)['embedding_to_target'] logits.shape.assert_is_compatible_with(targets.shape) mse_m.update_state(y_true=targets, y_pred=logits) mae_m.update_state(y_true=targets, y_pred=logits) ex_count += logits.shape[0] count += 1 logging.info('Saw %i examples after %i iterations as %.2f secs...', ex_count, count, time.time() - s) with writer.as_default(): tf.summary.scalar('mse', mse_m.result().numpy(), step=int(step)) tf.summary.scalar('mae', mae_m.result().numpy(), step=int(step)) logging.info('Done with eval step: %s in %.2f secs.', step, time.time() - s)
def train_and_report(debug=False): """Trains the classifier.""" logging.info('Logdir: %s', FLAGS.logdir) logging.info('Batch size: %s', FLAGS.train_batch_size) reader = tf.data.TFRecordDataset target_key = FLAGS.target_key if FLAGS.precomputed_targets: teacher_fn = None assert target_key is not None assert FLAGS.output_key is None else: teacher_fn = get_data.savedmodel_to_func( hub.load(FLAGS.teacher_model_hub), FLAGS.output_key) assert target_key is None ds = get_data.get_data(file_patterns=FLAGS.file_patterns, output_dimension=FLAGS.output_dimension, reader=reader, samples_key=FLAGS.samples_key, min_length=FLAGS.min_length, batch_size=FLAGS.train_batch_size, loop_forever=True, shuffle=True, teacher_fn=teacher_fn, target_key=target_key, normalize_to_pm_one=FLAGS.normalize_to_pm_one, shuffle_buffer_size=FLAGS.shuffle_buffer_size) assert len(ds.element_spec) == 2, ds.element_spec ds.element_spec[0].shape.assert_has_rank(2) # audio samples ds.element_spec[1].shape.assert_has_rank(2) # teacher embeddings output_dimension = ds.element_spec[1].shape[1] assert output_dimension == FLAGS.output_dimension # Define loss and optimizer hyparameters. loss_obj = tf.keras.losses.MeanSquaredError(name='mse_loss') opt = tf.keras.optimizers.Adam(learning_rate=FLAGS.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8) global_step = opt.iterations # Create model, loss, and other objects. model = models.get_keras_model(model_type=FLAGS.model_type, output_dimension=output_dimension, truncate_output=FLAGS.truncate_output, frontend=True, spec_augment=FLAGS.spec_augment) model.summary() # Add additional metrics to track. train_loss = tf.keras.metrics.MeanSquaredError(name='train_loss') train_mae = tf.keras.metrics.MeanAbsoluteError(name='train_mae') summary_writer = tf.summary.create_file_writer(FLAGS.logdir) train_step = get_train_step(model, loss_obj, opt, train_loss, train_mae, summary_writer) checkpoint = tf.train.Checkpoint(model=model, global_step=global_step) manager = tf.train.CheckpointManager( checkpoint, FLAGS.logdir, max_to_keep=FLAGS.checkpoint_max_to_keep) logging.info('Checkpoint prefix: %s', FLAGS.logdir) checkpoint.restore(manager.latest_checkpoint) if debug: return for inputs, targets in ds: # Inputs are audio vectors. inputs.shape.assert_has_rank(2) inputs.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.min_length]) targets.shape.assert_has_rank(2) targets.shape.assert_is_compatible_with( [FLAGS.train_batch_size, FLAGS.output_dimension]) train_step(inputs, targets, global_step) # Optional print output and save model. if global_step % 10 == 0: logging.info('step: %i, train loss: %f, train mean abs error: %f', global_step, train_loss.result(), train_mae.result()) if global_step % FLAGS.measurement_store_interval == 0: manager.save(checkpoint_number=global_step) manager.save(checkpoint_number=global_step) logging.info('Finished training.')