Пример #1
0
def create_model(params, is_train):
    with tf.name_scope('model'):
        if is_train:
            inputs = layers.Input((None, ), dtype=tf.int64, name='inputs')
            targets = layers.Input((None, ), dtype=tf.int64, name='targets')
            internal_model = Transformer(params, name='transformer')
            logits = internal_model([inputs, targets], training=is_train)
            vocab_size = params['vocab_size']
            label_smoothing = params['label_smoothing']
            if params['enable_metrics_in_training']:
                logits = metrics.MetricLayer(vocab_size)([logits, targets])
            logits = layers.Lambda(lambda x: x,
                                   name='logits',
                                   dtype=tf.float32)(logits)
            model = Model([inputs, targets], logits)
            # TODO: Can we do this loss in float16 instead of float32?
            loss = metrics.transformer_loss(logits, targets, label_smoothing,
                                            vocab_size)
            model.add_loss(loss)
            return model

        else:
            inputs = layers.Input((None, ), dtype=tf.int64, name='inputs')
            internal_model = Transformer(params, name='transformer')
            ret = internal_model([inputs], training=is_train)
            outputs, scores = ret['outputs'], ret['scores']
            return Model(inputs, [outputs, scores])
Пример #2
0
    def build(self):
        inputs = Input(shape=self.input_shape, name='encoder_input')
        x = Dense(self.intermediate_dim,
                  activation=self.activation_fct)(inputs)
        z_mean = Dense(self.latent_dim, name='z_mean')(x)
        z_log_var = Dense(self.latent_dim, name='z_log_var')(x)

        # use reparameterization trick to push the sampling out as input
        # note that "output_shape" isn't necessary with the TensorFlow backend
        z = Lambda(sampling, output_shape=(self.latent_dim, ),
                   name='z')([z_mean, z_log_var])

        # instantiate encoder model
        encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

        # build decoder model
        latent_inputs = Input(shape=(self.latent_dim, ), name='z_sampling')
        x = Dense(self.intermediate_dim,
                  activation=self.activation_fct)(latent_inputs)
        outputs = Dense(self.original_dim, activation='sigmoid')(x)

        # instantiate decoder model
        decoder = Model(latent_inputs, outputs, name='decoder')

        # instantiate VAE model
        outputs = decoder(encoder(inputs)[2])
        vae = Model(inputs, outputs, name='vae_mlp')

        # VAE Loss = mse_loss or xent_loss + kl_loss
        reconstruction_loss = mse(inputs, outputs)

        reconstruction_loss *= self.original_dim
        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        vae_loss = K.mean(reconstruction_loss + kl_loss)
        vae.add_loss(vae_loss)

        vae.compile(optimizer=self.optimizer,
                    loss=self.loss,
                    metrics=['accuracy'])

        x_train_split, x_valid_split = train_test_split(
            self.x_train,
            test_size=self.train_test_split,
            random_state=self.seed)

        vae.fit(x_train_split,
                x_train_split,
                batch_size=self.batch_size,
                epochs=self.epochs,
                verbose=self.verbosity,
                shuffle=True,
                validation_data=(x_valid_split, x_valid_split))

        x_train_pred = vae.predict(self.x_train)
        train_mse = np.mean(np.power(self.x_train - x_train_pred, 2), axis=1)
        self.threshold = np.quantile(train_mse, 0.9)
        self.vae = vae