Exemplo n.º 1
0
def load_model(filepath, compile=True, **kwargs):
    logger.debug("Load model from file: {}".format(filepath))
    keras_model = tf.keras.models.load_model(filepath,
                                             compile=compile,
                                             **kwargs)
    # FIXME load models with any type of parallelization strategy
    logger.warning("Loading model with the default `data parallel` strategy.")
    tnt_model = tnt.Model(keras_model,
                          parallel_strategy=tnt.ParallelStrategy.DATA)
    if compile:
        try:
            tnt_optimizer = tnt.distributed_optimizers.SynchDistributedOptimizer(
                keras_model.optimizer, group=tnt_model.group)
            tnt_model.dist_optimizer = tnt_optimizer
            tnt_model._set_internal_optimizer(tnt_model.dist_optimizer)
            tnt_model.compiled = True
            tnt_model.done_broadcast = True

            if version_utils.tf_version_below_equal('2.1'):
                tnt_model.model._experimental_run_tf_function = False
                logger.info("Set `experimental_run_tf_function` to False.")
        except:
            logger.info("The loaded model was not pre-compiled.")
    tnt_model.barrier.execute()
    return tnt_model
Exemplo n.º 2
0
 def test_add_metric(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.metrics == ['loss']
     tnt_model.add_metric(tnt_model.output,
                          aggregation='mean',
                          name='metric_name')
     assert len(tnt_model.metrics) == 2
     assert tnt_model.metrics_names == ['loss', 'metric_name']
Exemplo n.º 3
0
 def test_add_metric(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.metrics == []
     tnt_model.add_metric(tnt_model.output,
                          aggregation='mean',
                          name='metric_name')  # deprecated after Tf2.2
     assert len(tnt_model.metrics) == 1
     assert tnt_model.metrics_names == ['metric_name']
Exemplo n.º 4
0
    def test_save_before_compile(self, model, save_setup, parallel_strategy,
                                 check_configuration_identical):
        tnt_model = tnt.Model(model, parallel_strategy)
        tnt_model.save(save_setup['save_dir'],
                       tnt_save_all_devices=save_setup['all_devices'])
        reloaded_tnt_model = tnt.models.load_model(save_setup['save_dir'])

        assert isinstance(reloaded_tnt_model, keras.Model)
        check_configuration_identical(reloaded_tnt_model, tnt_model)
Exemplo n.º 5
0
def clone_model(model, **kwargs):
  if isinstance(model, tnt.Model):
    keras_model = tf.keras.models.clone_model(model.model, **kwargs)
    logger.info("clone model from instance of tnt.Model")
  elif isinstance(model, tf.keras.Model):
    keras_model = tf.keras.models.clone_model(model, **kwargs)
    logger.info("clone model from instance of tf.keras.Model")
  else:
    raise ValueError("[tnt.models.clone_model] `model` needs to be either",
                     "a `tf.keras.Model`, or a `tnt.Model`")
  return tnt.Model(keras_model)
Exemplo n.º 6
0
 def test_optimizer_with_name(self, optimizer_name, optimizer_type):
     tnt_model = tnt.Model(mnist.lenet5_model_generator(),
                           parallel_strategy=tnt.ParallelStrategy.DATA)
     tnt_model.compile(
         optimizer=optimizer_name,
         loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
         metrics=['accuracy'])
     tnt_optimizer = tnt_model.dist_optimizer
     assert isinstance(tnt_optimizer,
                       tnt.distributed_optimizers.SynchDistributedOptimizer)
     assert isinstance(tnt_optimizer.underlying_optimizer, optimizer_type)
Exemplo n.º 7
0
def model_from_yaml(yaml_string, **kwargs):
    logger.debug("Load model from yaml")
    try:
        keras_model = tf.keras.models.model_from_yaml(yaml_string, **kwargs)
        # FIXME load models with any type of parallelization strategy
        logger.warning(
            "Loading model with the default `data parallel` strategy.")
        return tnt.Model(keras_model,
                         parallel_strategy=tnt.ParallelStrategy.DATA)
    except:
        raise RuntimeError("[tnt.models.model_from_yaml] Cannot load model")
Exemplo n.º 8
0
 def test_metrics_names_after_fit(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     tnt_model.compile(optimizer=tf.keras.optimizers.Adam(),
                       loss="sparse_categorical_crossentropy",
                       metrics=["sparse_categorical_accuracy"])
     train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset,
                                             train_size=24,
                                             train_batch_size=24)
     tnt_model.fit(train_dataset)
     assert tnt_model.metrics_names == [
         "loss", "sparse_categorical_accuracy"
     ]
Exemplo n.º 9
0
    def test_reset_metrics(self):
        tnt_model = tnt.Model(mnist.lenet5_model_generator())
        tnt_model.compile(optimizer=tf.keras.optimizers.Adam(),
                          loss="sparse_categorical_crossentropy",
                          metrics=["sparse_categorical_accuracy"])
        train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset,
                                                train_size=60,
                                                train_batch_size=60)
        tnt_model.fit(train_dataset)
        assert all(float(m.result()) != 0 for m in tnt_model.metrics)

        tnt_model.reset_metrics()
        assert all(float(m.result()) == 0 for m in tnt_model.metrics)
Exemplo n.º 10
0
def clone_model(model, **kwargs):
    if isinstance(model, tnt.strategy.parallel_model.ParallelModel):
        keras_model = tf.keras.models.clone_model(model.model, **kwargs)
        logger.info("clone model from instance of tnt.Model")
    elif isinstance(model, tf.keras.Model):
        keras_model = tf.keras.models.clone_model(model, **kwargs)
        logger.info("clone model from instance of tf.keras.Model")
    else:
        raise ValueError("[tnt.models.clone_model] `model` needs to be either",
                         "a `tf.keras.Model`, or a `tnt.Model`")
    # FIXME load models with any type of parallelization strategy
    logger.warning("Loading model with the default `data parallel` strategy.")
    return tnt.Model(keras_model, parallel_strategy=tnt.ParallelStrategy.DATA)
Exemplo n.º 11
0
 def from_config(cls, config, **kwargs):
     try:
         keras_model = tf.keras.Sequential.from_config(config, **kwargs)
         logger.info("Loaded model from `keras.Sequential`.")
     except:
         raise RuntimeError(
             """[tnt.keras.Sequential.from_config] Cannot load
         model; provided configuration is not a `keras.Sequential` model."""
         )
     # FIXME load models with any type of parallelization strategy
     logger.warning(
         "Loading model with the default `data parallel` strategy.")
     return tnt.Model(keras_model,
                      parallel_strategy=tnt.ParallelStrategy.DATA)
Exemplo n.º 12
0
  def test_cifar_alexnet(self, keras_model, optimizer, micro_batch_size, nbatches, ntest_batches):
    batch_size = micro_batch_size * tnt.get_size()
    nsamples = nbatches * batch_size
    (number_epochs, lr) = cifar.get_hyperparams(optimizer)
    (train_dataset, test_dataset) = util.load_train_test_dataset(cifar.load_cifar_dataset,
                                                                 train_size = nsamples,
                                                                 train_batch_size = batch_size,
                                                                 test_size = ntest_batches * batch_size,
                                                                 test_batch_size = batch_size)
    if optimizer.__name__ == 'SGD':
      keras_optimizer = optimizer(learning_rate=lr, momentum=0.9)
    else:
      keras_optimizer = optimizer(learning_rate=lr)

    model = tnt.Model(keras_model())
    model.compile(keras_optimizer,
                  loss = keras.losses.SparseCategoricalCrossentropy(),
                  metrics = [keras.metrics.SparseCategoricalAccuracy()])
    model.fit(train_dataset,
              epochs = number_epochs,
              verbose = 0)
    results = model.evaluate(test_dataset)
    util.check_accuracy_greater(results[1], 0.5)
  def __init__(self, flags_obj):
    """Init function of TransformerMain.

    Args:
      flags_obj: Object containing parsed flag values, i.e., FLAGS.
    """
    self.flags_obj = flags_obj
    self.params = tnt_misc.get_model_params(flags_obj.param_set)
    self.params["train_epochs"] = flags_obj.train_epochs
    self.params["epochs_between_evals"] = flags_obj.epochs_between_evals
    self.params["num_sentences"] = flags_obj.num_sentences
    self.params["num_eval_sentences"] = flags_obj.num_eval_sentences
    self.params["batch_size"] = flags_obj.batch_size or self.params["default_batch_size"]

    self.params["data_dir"] = flags_obj.data_dir
    self.params["vocab_size"] = flags_obj.vocab_size or self.params["vocab_size"]
    self.params["max_length"] = flags_obj.max_length
    self.params["decode_batch_size"] = flags_obj.decode_batch_size
    self.params["decode_max_length"] = flags_obj.decode_max_length
    self.params["max_io_parallelism"] = (
        flags_obj.num_parallel_calls or tf.data.experimental.AUTOTUNE)

    self.params["use_synthetic_data"] = flags_obj.use_synthetic_data
    self.params["dtype"] = tf.float32

    # Transformer model used both as Tarantella model (in training) and as a serial
    # model for inference
    internal_model = transformer.Transformer(self.params, name="transformer_v2")

    # The train model includes an additional logits layer and a customized loss
    self.train_model = create_model(internal_model, self.params, is_train = True)
    # Enable distributed training
    self.train_model = tnt.Model(self.train_model)

    # The inference model is wrapped as a different Keras model that does not use labels
    self.predict_model = create_model(internal_model, self.params, is_train = False)
Exemplo n.º 14
0
 def test_clone_tnt_model(self, keras_model, parallel_strategy):
     tnt_model = tnt.Model(keras_model, parallel_strategy)
     cloned_model = tnt.models.clone_model(tnt_model)
     util.check_model_configuration_identical(tnt_model, cloned_model)
Exemplo n.º 15
0
 def test_layers(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert len(tnt_model.layers) == 8
     assert tnt_model.layers[1].name == 'conv1'
Exemplo n.º 16
0
def model_from_yaml(yaml_string, **kwargs):
  keras_model = tf.keras.models.model_from_yaml(yaml_string, **kwargs)
  return tnt.Model(keras_model)
Exemplo n.º 17
0
def model_from_json(json_string, **kwargs):
  keras_model = tf.keras.models.model_from_json(json_string, **kwargs)
  return tnt.Model(keras_model)
Exemplo n.º 18
0
def load_model(filepath, **kwargs):
  keras_model = tf.keras.models.load_model(filepath, **kwargs)
  # FIXME: compile tnt.Model before returning
  return tnt.Model(keras_model)
Exemplo n.º 19
0
def main(_):
    flags_obj = flags.FLAGS

    # get rank and comm_size
    rank = tnt.get_rank()
    comm_size = tnt.get_size()

    # compute micro batch if the dataset is not automatically distributed by Tarantella
    if not flags_obj.auto_distributed:
        batch_size = flags_obj.batch_size // comm_size
    else:
        batch_size = flags_obj.batch_size

    # Load and preprocess datasets
    (train_dataset, validation_dataset,
     _) = dataset_utils.get_tnt_cifar10_dataset(45000, 5000, 10000, batch_size)

    # Create model and wrap it into a Tarantella model
    model = resnet_model.resnet32(num_classes=10)
    model = tnt.Model(model)

    optimizer = get_optimizer(flags_obj.batch_size)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=(['sparse_categorical_accuracy']))
    model.summary()

    callbacks = []
    if flags_obj.enable_tensorboard:
        callbacks.append(
            tf.keras.callbacks.TensorBoard(log_dir=flags_obj.model_dir,
                                           profile_batch=2))
    if flags_obj.profile_runtime:
        callbacks.append(
            RuntimeProfiler(batch_size=batch_size,
                            logging_freq=flags_obj.logging_freq,
                            print_freq=flags_obj.print_freq))

    if flags_obj.enable_checkpoint_and_export:
        if flags_obj.model_dir is not None:
            ckpt_full_path = os.path.join(flags_obj.model_dir,
                                          'model.ckpt-{epoch:04d}')
            callbacks.append(
                tf.keras.callbacks.ModelCheckpoint(ckpt_full_path,
                                                   save_weights_only=True))

    logging.info("Start training")
    kwargs = {
        'tnt_distribute_dataset': flags_obj.auto_distributed,
        'tnt_distribute_validation_dataset': flags_obj.auto_distributed
    }
    history = model.fit(train_dataset,
                        epochs=flags_obj.train_epochs,
                        callbacks=callbacks,
                        validation_data=validation_dataset,
                        validation_freq=flags_obj.epochs_between_evals,
                        verbose=flags_obj.verbose,
                        **kwargs)
    logging.info("Train history: {}".format(history.history))

    kwargs = {'tnt_distribute_dataset': flags_obj.auto_distributed}
    eval_output = model.evaluate(validation_dataset,
                                 verbose=flags_obj.verbose,
                                 **kwargs)
Exemplo n.º 20
0
 def test_non_trainable_weights(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.non_trainable_weights == []
Exemplo n.º 21
0
def generate_tnt_model_runner(model):
    model_data_par = tnt.Model(model)
    runner = TrainingRunner(model_data_par)
    return runner
Exemplo n.º 22
0
 def test_stateful(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.stateful == False
Exemplo n.º 23
0
 def test_weights(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert len(tnt_model.weights) == 8  # 2 convs, 2 dense + biases
Exemplo n.º 24
0
 def test_state_updates(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.state_updates == []
Exemplo n.º 25
0
 def test_losses(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.losses == []
     tnt_model.add_loss(tf.abs(tnt_model.output))
     assert len(tnt_model.losses) == 1
Exemplo n.º 26
0
 def test_output(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.output.shape[0] == None
     assert tnt_model.output.shape[1] == 10
Exemplo n.º 27
0
        1,
    ), name='input')
    x = keras.layers.Conv2D(20, 5, padding="same", activation='relu')(inputs)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = keras.layers.Conv2D(50, 5, padding="same", activation='relu')(x)
    x = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = keras.layers.Flatten()(x)
    x = keras.layers.Dense(500, activation='relu')(x)
    outputs = keras.layers.Dense(10, activation='softmax')(x)
    return keras.Model(inputs=inputs, outputs=outputs)


args = parse_args()

# Create Tarantella model
model = tnt.Model(lenet5_model_generator())

# Compile Tarantella model (as with Keras)
model.compile(optimizer=keras.optimizers.SGD(learning_rate=args.learning_rate),
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=[keras.metrics.SparseCategoricalAccuracy()])

# Load MNIST dataset (as with Keras)
shuffle_seed = 42
(x_train, y_train), (x_val, y_val), (x_test, y_test) = \
      mnist_as_np_arrays(args.train_size, args.val_size, args.test_size)

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(len(x_train), shuffle_seed)
train_dataset = train_dataset.batch(args.batch_size)
train_dataset = train_dataset.prefetch(
Exemplo n.º 28
0
def get_tnt_model_compiled(model, parallel_strategy, optimizer):
    tnt_model = tnt.Model(model, parallel_strategy)
    tnt_model.compile(optimizer=optimizer, **get_compile_params())
    return tnt_model
Exemplo n.º 29
0
 def test_run_eagerly(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.run_eagerly == False
Exemplo n.º 30
0
 def test_metrics_names(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     assert tnt_model.metrics_names == []