def test_update_state_with_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertEqual(self.evaluate(metric.result()), 6 / 8.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 8 / 12.0)
def test_update_state_with_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accruacy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 5 / 7.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 33% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ]) self.assertAllClose(self.evaluate(metric.result()), 6 / 10.0)
def test_constructor_no_masked_token(self): metric_name = 'my_test_metric' metric = keras_metrics.MaskedCategoricalAccuracy(name=metric_name) self.assertIsInstance(metric, tf.keras.metrics.Metric) self.assertEqual(metric.name, metric_name) self.assertAllEqual(metric.get_config()['masked_tokens'], []) self.assertEqual(self.evaluate(metric.result()), 0.0)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, sequence_length: Optional[int] = 80, max_batches: Optional[int] = None): """Trains a two-layer RNN on Shakespeare next-character-prediction. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. sequence_length: The sequence length used for Shakespeare preprocessing. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ train_dataset, eval_dataset = shakespeare_dataset.get_centralized_datasets( train_batch_size=batch_size, max_train_batches=max_batches, max_test_batches=max_batches, sequence_length=sequence_length) pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() model = shakespeare_models.create_recurrent_model( vocab_size=VOCAB_SIZE, sequence_length=sequence_length) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]) ]) centralized_training_loop.run(keras_model=model, train_dataset=train_dataset, validation_dataset=eval_dataset, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)
def metrics_builder(): """Returns a `list` of `tf.keras.metric.Metric` objects.""" pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() return [ keras_metrics.NumBatchesCounter(), keras_metrics.NumExamplesCounter(), keras_metrics.NumTokensCounter(masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]), ]
def test_weighted_update_state_with_scalar_weight(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4]], y_pred=[ # A batch with 50% accuracy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=1.0) self.assertAllClose(self.evaluate(metric.result()), .5)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') train_client_data, test_client_data = ( tff.simulation.datasets.shakespeare.load_data()) def preprocess(ds): return shakespeare_dataset.convert_snippets_to_character_sequence_examples( dataset=ds, batch_size=FLAGS.batch_size, epochs=1, shuffle_buffer_size=0, sequence_length=FLAGS.shakespeare_sequence_length) train_dataset = train_client_data.create_tf_dataset_from_all_clients() if FLAGS.shuffle_train_data: train_dataset = train_dataset.shuffle(buffer_size=10000) train_dataset = preprocess(train_dataset) eval_dataset = preprocess( test_client_data.create_tf_dataset_from_all_clients()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() pad_token, _, _, _ = shakespeare_dataset.get_special_tokens() model = shakespeare_models.create_recurrent_model( vocab_size=VOCAB_SIZE, sequence_length=FLAGS.shakespeare_sequence_length) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]) ]) hparams_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) centralized_training_loop.run(keras_model=model, train_dataset=train_dataset, validation_dataset=eval_dataset, experiment_name=FLAGS.experiment_name, root_output_dir=FLAGS.root_output_dir, num_epochs=FLAGS.num_epochs, hparams_dict=hparams_dict, decay_epochs=FLAGS.decay_epochs, lr_decay=FLAGS.lr_decay)
def test_update_state_with_all_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[1, 2, 3, 4]) metric.update_state( # All batches should be masked. y_true=[[1, 2, 3, 4], [4, 3, 2, 1]], y_pred=[ [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.0)
def test_update_state_with_multiple_tokens_masked(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[1, 2, 3, 4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ [ # This batch should be masked. [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], [ # Batch with 50% accuracy [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ]) self.assertAllClose(self.evaluate(metric.result()), 0.5)
def test_weighted_update_state_special_character_rank_2_sample_weight(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (6 + 2) / 10.0)
def test_weighted_update_state_no_special_character(self): metric = keras_metrics.MaskedCategoricalAccuracy() metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4) / 12.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (6 + 4 + 2) / 18.0)
def test_weighted_update_state_with_masked_token(self): metric = keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[4]) metric.update_state( y_true=[[1, 2, 3, 4], [0, 0, 0, 0]], y_pred=[ # A batch with 100% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.1, 0.1, 0.9, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], # A batch with 50% accuracy. [ [0.1, 0.9, 0.1, 0.1, 0.1], [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.9, 0.1], [0.9, 0.1, 0.1, 0.1, 0.0], ], ], # A weight for each `y_true` scalar. sample_weight=[[1.0, 2.0, 1.0, 2.0], [1.0, 2.0, 1.0, 2.0]]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4) / 10.0) metric.update_state( y_true=[[0, 4, 1, 2]], y_pred=[ # A batch with 25% accruacy. [ [0.9, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], [0.1, 0.1, 0.1, 0.9, 0.1], [0.1, 0.1, 0.1, 0.1, 0.9], ], ], sample_weight=[1.0, 1.0, 2.0, 2.0]) self.assertAllClose(self.evaluate(metric.result()), (4 + 4 + 1) / 15.0)
def run_centralized(optimizer: tf.keras.optimizers.Optimizer, experiment_name: str, root_output_dir: str, num_epochs: int, batch_size: int, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, hparams_dict: Optional[Mapping[str, Any]] = None, vocab_size: Optional[int] = 10000, num_oov_buckets: Optional[int] = 1, sequence_length: Optional[int] = 20, num_validation_examples: Optional[int] = 10000, embedding_size: Optional[int] = 96, latent_size: Optional[int] = 670, num_layers: Optional[int] = 1, shared_embedding: Optional[bool] = False, max_batches: Optional[int] = None): """Trains an RNN on the Stack Overflow next word prediction task. Args: optimizer: A `tf.keras.optimizers.Optimizer` used to perform training. experiment_name: The name of the experiment. Part of the output directory. root_output_dir: The top-level output directory for experiment runs. The `experiment_name` argument will be appended, and the directory will contain tensorboard logs, metrics written as CSVs, and a CSV of hyperparameter choices (if `hparams_dict` is used). num_epochs: The number of training epochs. batch_size: The batch size, used for train, validation, and test. decay_epochs: The number of epochs of training before decaying the learning rate. If None, no decay occurs. lr_decay: The amount to decay the learning rate by after `decay_epochs` training epochs have occurred. hparams_dict: A mapping with string keys representing the hyperparameters and their values. If not None, this is written to CSV. vocab_size: Integer dictating the number of most frequent words to use in the vocabulary. num_oov_buckets: The number of out-of-vocabulary buckets to use. sequence_length: The maximum number of words to take for each sequence. num_validation_examples: The number of test examples to use for validation. embedding_size: The dimension of the word embedding layer. latent_size: The dimension of the latent units in the recurrent layers. num_layers: The number of stacked recurrent layers to use. shared_embedding: Boolean indicating whether to tie input and output embeddings. max_batches: If set to a positive integer, datasets are capped to at most that many batches. If set to None or a nonpositive integer, the full datasets are used. """ train_dataset, validation_dataset, test_dataset = stackoverflow_dataset.get_centralized_datasets( vocab_size=vocab_size, max_seq_len=sequence_length, train_batch_size=batch_size, max_train_batches=max_batches, max_validation_batches=max_batches, max_test_batches=max_batches, num_validation_examples=num_validation_examples, num_oov_buckets=num_oov_buckets, ) model = stackoverflow_models.create_recurrent_model( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets, name='stackoverflow-lstm', embedding_size=embedding_size, latent_size=latent_size, num_layers=num_layers, shared_embedding=shared_embedding) special_tokens = stackoverflow_dataset.get_special_tokens( vocab_size=vocab_size, num_oov_buckets=num_oov_buckets) pad_token = special_tokens.pad oov_tokens = special_tokens.oov eos_token = special_tokens.eos model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ keras_metrics.MaskedCategoricalAccuracy( name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov', masked_tokens=[pad_token] + oov_tokens), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, eos_token] + oov_tokens), ]) centralized_training_loop.run( keras_model=model, train_dataset=train_dataset, validation_dataset=validation_dataset, test_dataset=test_dataset, experiment_name=experiment_name, root_output_dir=root_output_dir, num_epochs=num_epochs, hparams_dict=hparams_dict, decay_epochs=decay_epochs, lr_decay=lr_decay)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') experiment_output_dir = FLAGS.root_output_dir tensorboard_dir = os.path.join(experiment_output_dir, 'logdir', FLAGS.experiment_name) results_dir = os.path.join(experiment_output_dir, 'results', FLAGS.experiment_name) for path in [experiment_output_dir, tensorboard_dir, results_dir]: try: tf.io.gfile.makedirs(path) except tf.errors.OpError: pass # Directory already exists. hparam_dict = collections.OrderedDict([(name, FLAGS[name].value) for name in hparam_flags]) hparam_dict['results_file'] = results_dir hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) train_client_data, test_client_data = ( tff.simulation.datasets.shakespeare.load_data()) def preprocess(ds): return dataset.convert_snippets_to_character_sequence_examples( ds, FLAGS.batch_size, epochs=1).cache() train_dataset = train_client_data.create_tf_dataset_from_all_clients() if FLAGS.shuffle_train_data: train_dataset = train_dataset.shuffle(buffer_size=10000) train_dataset = preprocess(train_dataset) eval_dataset = preprocess( test_client_data.create_tf_dataset_from_all_clients()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() pad_token, _, _, _ = dataset.get_special_tokens() # Vocabulary with one OOV ID and zero for the mask. vocab_size = len(dataset.CHAR_VOCAB) + 2 model = models.create_recurrent_model(vocab_size=vocab_size, batch_size=FLAGS.batch_size) model.compile( optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ keras_metrics.MaskedCategoricalAccuracy(masked_tokens=[pad_token]) ]) logging.info('Training model:') logging.info(model.summary()) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) # Reduce the learning rate every 20 epochs. def decay_lr(epoch, lr): if (epoch + 1) % 20 == 0: return lr * 0.1 else: return lr lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) history = model.fit( train_dataset, validation_data=eval_dataset, epochs=FLAGS.num_epochs, callbacks=[lr_callback, tensorboard_callback, csv_logger_callback]) logging.info('Final metrics:') for name in ['loss', 'accuracy']: metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric)
def run_experiment(): """Runs the training experiment.""" _, validation_dataset, test_dataset = dataset.construct_word_level_datasets( FLAGS.vocab_size, FLAGS.batch_size, 1, FLAGS.sequence_length, -1, FLAGS.num_validation_examples) train_dataset = dataset.get_centralized_train_dataset( FLAGS.vocab_size, FLAGS.batch_size, FLAGS.sequence_length, FLAGS.shuffle_buffer_size) model = models.create_recurrent_model( vocab_size=FLAGS.vocab_size, name='stackoverflow-lstm', embedding_size=FLAGS.embedding_size, latent_size=FLAGS.latent_size, num_layers=FLAGS.num_layers, shared_embedding=FLAGS.shared_embedding) logging.info('Training model: %s', model.summary()) optimizer = optimizer_utils.create_optimizer_fn_from_flags('centralized')() pad_token, oov_token, _, eos_token = dataset.get_special_tokens( FLAGS.vocab_size) model.compile( loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=[ # Plus 4 for pad, oov, bos, eos keras_metrics.MaskedCategoricalAccuracy( name='accuracy_with_oov', masked_tokens=[pad_token]), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov', masked_tokens=[pad_token, oov_token]), keras_metrics.MaskedCategoricalAccuracy( name='accuracy_no_oov_or_eos', masked_tokens=[pad_token, oov_token, eos_token]), ]) train_results_path = os.path.join(FLAGS.root_output_dir, 'train_results', FLAGS.experiment_name) test_results_path = os.path.join(FLAGS.root_output_dir, 'test_results', FLAGS.experiment_name) train_csv_logger = keras_callbacks.AtomicCSVLogger(train_results_path) test_csv_logger = keras_callbacks.AtomicCSVLogger(test_results_path) log_dir = os.path.join(FLAGS.root_output_dir, 'logdir', FLAGS.experiment_name) try: tf.io.gfile.makedirs(log_dir) tf.io.gfile.makedirs(train_results_path) tf.io.gfile.makedirs(test_results_path) except tf.errors.OpError: pass # log_dir already exists. train_tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, write_graph=True, update_freq=FLAGS.tensorboard_update_frequency) test_tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) # Write the hyperparameters to a CSV: hparam_dict = collections.OrderedDict([ (name, FLAGS[name].value) for name in hparam_flags ]) hparams_file = os.path.join(FLAGS.root_output_dir, FLAGS.experiment_name, 'hparams.csv') utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file) model.fit( train_dataset, epochs=FLAGS.epochs, verbose=0, validation_data=validation_dataset, callbacks=[train_csv_logger, train_tensorboard_callback]) score = model.evaluate( test_dataset, verbose=0, callbacks=[test_csv_logger, test_tensorboard_callback]) logging.info('Final test loss: %.4f', score[0]) logging.info('Final test accuracy: %.4f', score[1])