def create_model(params, is_train): """Creates transformer model.""" with tf.name_scope("model"): if is_train: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") internal_model = Transformer(params, name="transformer_v2") logits = internal_model([inputs, targets], training=is_train) vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] if params["enable_metrics_in_training"]: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits", dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) # TODO(reedwm): Can we do this loss in float16 instead of float32? loss = metrics.transformer_loss(logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model else: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") internal_model = Transformer(params, name="transformer_v2") ret = internal_model([inputs], training=is_train) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model(inputs, [outputs, scores])
def create_model(params, is_train): """Creates transformer model.""" with tf.name_scope("model"): if is_train: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") internal_model = Transformer(params, name="transformer_v2") logits = internal_model([inputs, targets], training=is_train) vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = metrics.LossLayer(vocab_size, label_smoothing)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits")(logits) return tf.keras.Model([inputs, targets], logits) else: inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs") internal_model = Transformer(params, name="transformer_v2") ret = internal_model([inputs], training=is_train) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model(inputs, [outputs, scores])
def test_metric_layer(self): vocab_size = 50 logits = tf.keras.layers.Input((None, vocab_size), dtype="float32", name="logits") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") output_logits = metrics.MetricLayer(vocab_size)([logits, targets]) self.assertEqual(output_logits.shape.as_list(), [ None, None, vocab_size, ])