def create_model(params, is_train): with tf.name_scope('model'): if is_train: inputs = layers.Input((None, ), dtype=tf.int64, name='inputs') targets = layers.Input((None, ), dtype=tf.int64, name='targets') internal_model = Transformer(params, name='transformer') logits = internal_model([inputs, targets], training=is_train) vocab_size = params['vocab_size'] label_smoothing = params['label_smoothing'] if params['enable_metrics_in_training']: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = layers.Lambda(lambda x: x, name='logits', dtype=tf.float32)(logits) model = Model([inputs, targets], logits) # TODO: Can we do this loss in float16 instead of float32? loss = metrics.transformer_loss(logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model else: inputs = layers.Input((None, ), dtype=tf.int64, name='inputs') internal_model = Transformer(params, name='transformer') ret = internal_model([inputs], training=is_train) outputs, scores = ret['outputs'], ret['scores'] return Model(inputs, [outputs, scores])
def build(self): inputs = Input(shape=self.input_shape, name='encoder_input') x = Dense(self.intermediate_dim, activation=self.activation_fct)(inputs) z_mean = Dense(self.latent_dim, name='z_mean')(x) z_log_var = Dense(self.latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(self.latent_dim, ), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') # build decoder model latent_inputs = Input(shape=(self.latent_dim, ), name='z_sampling') x = Dense(self.intermediate_dim, activation=self.activation_fct)(latent_inputs) outputs = Dense(self.original_dim, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') # VAE Loss = mse_loss or xent_loss + kl_loss reconstruction_loss = mse(inputs, outputs) reconstruction_loss *= self.original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer=self.optimizer, loss=self.loss, metrics=['accuracy']) x_train_split, x_valid_split = train_test_split( self.x_train, test_size=self.train_test_split, random_state=self.seed) vae.fit(x_train_split, x_train_split, batch_size=self.batch_size, epochs=self.epochs, verbose=self.verbosity, shuffle=True, validation_data=(x_valid_split, x_valid_split)) x_train_pred = vae.predict(self.x_train) train_mse = np.mean(np.power(self.x_train - x_train_pred, 2), axis=1) self.threshold = np.quantile(train_mse, 0.9) self.vae = vae