def main(argv): del argv # unused arg if not FLAGS.use_gpu: raise ValueError('Only GPU is currently supported.') if FLAGS.num_cores > 1: raise ValueError('Only a single accelerator is currently supported.') np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset) n_train = x_train.shape[0] ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model = multilayer_perceptron( x_train.shape[1:], np.std(y_train, axis=0) + tf.keras.backend.epsilon()) if FLAGS.epsilon: loss_fn = make_adversarial_loss_fn(model) optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate) else: def negative_log_likelihood(y_true, y_pred): return -y_pred.distribution.log_prob(y_true) model.compile( optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate), loss=negative_log_likelihood) member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i)) tensorboard = tf.keras.callbacks.TensorBoard( log_dir=member_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.epsilon: for epoch in range( (FLAGS.batch_size * FLAGS.training_steps) // n_train): logging.info('Epoch %s', epoch) for j in range(n_train // FLAGS.batch_size): perm = np.random.permutation(n_train) with tf.GradientTape() as tape: loss = loss_fn(x_train[perm[j:j + FLAGS.batch_size]], y_train[perm[j:j + FLAGS.batch_size]]) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients( zip(grads, model.trainable_weights)) else: if FLAGS.bootstrap: inds = np.random.choice(n_train, n_train, replace=True) x_sampled = x_train[inds] y_sampled = y_train[inds] model.fit( x=x_train if not FLAGS.bootstrap else x_sampled, y=y_train if not FLAGS.bootstrap else y_sampled, batch_size=FLAGS.batch_size, epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=0, callbacks=[tensorboard]) member_filename = os.path.join(member_dir, 'model.weights') ensemble_filenames.append(member_filename) model.save_weights(member_filename) # TODO(trandustin): Move this into utils.ensemble_metrics. It's currently # separate so that VI can use utils.ensemble_metrics while in TF1. def ll(arg): features, labels = arg predictions = model(features) log_prob = predictions.distribution.log_prob(labels) error = predictions.distribution.loc - labels return [log_prob, error] ensemble_metrics_vals = { 'train': utils.ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), 'test': utils.ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames), } for split, metrics in ensemble_metrics_vals.items(): logging.info(split) for metric_name in metrics: logging.info('%s: %s', metric_name, metrics[metric_name])
def main(argv): del argv # unused arg if not FLAGS.use_gpu: raise ValueError('Only GPU is currently supported.') if FLAGS.num_cores > 1: raise ValueError('Only a single accelerator is currently supported.') np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset, session) n_train = x_train.shape[0] num_classes = int(np.amax(y_train)) + 1 ensemble_filenames = [] for i in range(FLAGS.ensemble_size): # TODO(trandustin): We re-build the graph for each ensemble member. This # is due to an unknown bug where the variables are otherwise not # re-initialized to be random. While this is inefficient in graph mode, I'm # keeping this for now as we'd like to move to eager mode anyways. model = lenet5(x_train.shape[1:], num_classes) def negative_log_likelihood(y, rv_y): del rv_y # unused arg return -model.output.distribution.log_prob(tf.squeeze(y)) # pylint: disable=cell-var-from-loop def accuracy(y_true, y_sample): del y_sample # unused arg return tf.equal( tf.argmax(input=model.output.distribution.logits, axis=1), # pylint: disable=cell-var-from-loop tf.cast(tf.squeeze(y_true), tf.int64)) def log_likelihood(y_true, y_sample): del y_sample # unused arg return model.output.distribution.log_prob(tf.squeeze(y_true)) # pylint: disable=cell-var-from-loop model.compile( optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate), loss=negative_log_likelihood, metrics=[log_likelihood, accuracy]) member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i)) tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=member_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.bootstrap: inds = np.random.choice(n_train, n_train, replace=True) x_sampled = x_train[inds] y_sampled = y_train[inds] model.fit(x=x_train if not FLAGS.bootstrap else x_sampled, y=y_train if not FLAGS.bootstrap else y_sampled, batch_size=FLAGS.batch_size, epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=[tensorboard]) member_filename = os.path.join(member_dir, 'model.weights') ensemble_filenames.append(member_filename) model.save_weights(member_filename) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(tf.squeeze(labels)), model.output.distribution.logits, ]) ensemble_metrics_vals = { 'train': utils.ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), 'test': utils.ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames), } for split, metrics in ensemble_metrics_vals.items(): logging.info(split) for metric_name in metrics: logging.info('%s: %s', metric_name, metrics[metric_name])
def main(argv): del argv # unused arg if not FLAGS.use_gpu: raise ValueError('Only GPU is currently supported.') if FLAGS.num_cores > 1: raise ValueError('Only a single accelerator is currently supported.') np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() session = tf1.Session() with session.as_default(): x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset) n_train = x_train.shape[0] model = multilayer_perceptron( n_train, x_train.shape[1:], np.std(y_train) + tf.keras.backend.epsilon()) for l in model.layers: l.kl_cost_weight = l.add_weight( name='kl_cost_weight', shape=(), initializer=tf.constant_initializer(0.), trainable=False) l.kl_cost_bias = l.add_variable( name='kl_cost_bias', shape=(), initializer=tf.constant_initializer(0.), trainable=False) [negative_log_likelihood, mse, log_likelihood, kl, elbo] = get_losses_and_metrics(model, n_train) metrics = [elbo, log_likelihood, kl, mse] tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=FLAGS.output_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) def fit_fn(model, steps, initial_epoch): return model.fit( x=x_train, y=y_train, batch_size=FLAGS.batch_size, epochs=initial_epoch + (FLAGS.batch_size * steps) // n_train, initial_epoch=initial_epoch, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=1, callbacks=[tensorboard]) model.compile( optimizer=tf.keras.optimizers.Adam(lr=float(FLAGS.learning_rate)), loss=negative_log_likelihood, metrics=metrics) session.run(tf1.initialize_all_variables()) train_epochs = (FLAGS.training_steps * FLAGS.batch_size) // n_train fit_fn(model, FLAGS.training_steps, initial_epoch=0) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function([model.input, labels], [ model.output.distribution.log_prob(labels), model.output.distribution.loc - labels ]) base_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll), utils.ensemble_metrics(x_test, y_test, model, ll), ] model_dir = os.path.join(FLAGS.output_dir, 'models') tf.io.gfile.makedirs(model_dir) base_model_filename = os.path.join(model_dir, 'base_model.weights') model.save_weights(base_model_filename) # Train base model further for comparison. fit_fn(model, FLAGS.n_auxiliary_variables * FLAGS.auxiliary_sampling_frequency * FLAGS.ensemble_size, initial_epoch=train_epochs) overtrained_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll), utils.ensemble_metrics(x_test, y_test, model, ll), ] # Perform refined VI. sample_op = [] for l in model.layers: if hasattr(l, 'kernel_prior'): weight_op, weight_cost = sample_auxiliary_op( l.kernel_prior.distribution, l.kernel_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(weight_op) sample_op.append(l.kl_cost_weight.assign_add(weight_cost)) # Fix the variance of the prior session.run(l.kernel_prior.distribution.istrainable.assign(0.)) if hasattr(l.bias_prior, 'distribution'): bias_op, bias_cost = sample_auxiliary_op( l.bias_prior.distribution, l.bias_posterior.distribution, FLAGS.auxiliary_variance_ratio) sample_op.append(bias_op) sample_op.append(l.kl_cost_bias.assign_add(bias_cost)) # Fix the variance of the prior session.run( l.bias_prior.distribution.istrainable.assign(0.)) ensemble_filenames = [] for i in range(FLAGS.ensemble_size): model.load_weights(base_model_filename) for j in range(FLAGS.n_auxiliary_variables): session.run(sample_op) model.compile( optimizer=tf.keras.optimizers.Adam( # The learning rate is proportional to the scale of the prior. lr=float(FLAGS.learning_rate_for_sampling * np.sqrt(1. - FLAGS.auxiliary_variance_ratio)**j)), loss=negative_log_likelihood, metrics=metrics) fit_fn(model, FLAGS.auxiliary_sampling_frequency, initial_epoch=train_epochs) ensemble_filename = os.path.join( model_dir, 'ensemble_component_' + str(i) + '.weights') ensemble_filenames.append(ensemble_filename) model.save_weights(ensemble_filename) auxiliary_metrics = [ utils.ensemble_metrics(x_train, y_train, model, ll, weight_files=ensemble_filenames), utils.ensemble_metrics(x_test, y_test, model, ll, weight_files=ensemble_filenames), ] for metrics, name in [(base_metrics, 'Base model'), (overtrained_metrics, 'Overtrained model'), (auxiliary_metrics, 'Auxiliary sampling')]: logging.info(name) for metrics_dict, split in [(metrics[0], 'train'), (metrics[1], 'test')]: logging.info(split) for metric_name in metrics_dict: logging.info('%s: %s', metric_name, metrics_dict[metric_name])
def main(argv): del argv # unused arg np.random.seed(FLAGS.seed) tf.random.set_seed(FLAGS.seed) tf.io.gfile.makedirs(FLAGS.output_dir) tf1.disable_v2_behavior() x_train, y_train, x_test, y_test = utils.load(FLAGS.dataset) n_train = x_train.shape[0] session = tf1.Session() ensemble_filenames = [] for i in range(FLAGS.ensemble_size): # TODO(trandustin): We re-build the graph for each ensemble member. This # is due to an unknown bug where the variables are otherwise not # re-initialized to be random. While this is inefficient in graph mode, I'm # keeping this for now as we'd like to move to eager mode anyways. model = multilayer_perceptron( x_train.shape[1:], np.std(y_train, axis=0) + tf.keras.backend.epsilon()) def negative_log_likelihood(y, rv_y): del rv_y # unused arg return -model.output.distribution.log_prob(y) # pylint: disable=cell-var-from-loop def mse(y_true, y_sample): del y_sample # unused arg return tf.math.square(model.output.distribution.loc - y_true) # pylint: disable=cell-var-from-loop def log_likelihood(y_true, y_sample): del y_sample # unused arg return model.output.distribution.log_prob(y_true) # pylint: disable=cell-var-from-loop if FLAGS.epsilon: y_true = tf.keras.Input(shape=y_train.shape[1:], name='labels') loss = tf.reduce_mean(-model.output.distribution.log_prob(y_true)) nn_input_tensor = model.input grad = tf1.gradients(loss, nn_input_tensor)[0] # It is assumed that the training data is normalized. adv_inputs_tensor = nn_input_tensor + FLAGS.epsilon * tf.math.sign( tf1.stop_gradient(grad)) adv_inputs = tf.keras.Input(tensor=adv_inputs_tensor, name='adv_inputs') adv_out_dist = model(adv_inputs) adv_loss = tf.reduce_mean(-adv_out_dist.distribution.log_prob(y_true)) optimizer = tf1.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(0.5 * loss + 0.5 * adv_loss) else: model.compile( optimizer=tf.keras.optimizers.Adam(lr=FLAGS.learning_rate), loss=negative_log_likelihood, metrics=[log_likelihood, mse]) member_dir = os.path.join(FLAGS.output_dir, 'member_' + str(i)) tensorboard = tf1.keras.callbacks.TensorBoard( log_dir=member_dir, update_freq=FLAGS.batch_size * FLAGS.validation_freq) if FLAGS.epsilon: session.run(tf1.initialize_all_variables()) for epoch in range((FLAGS.batch_size * FLAGS.training_steps) // n_train): logging.info('Epoch %s', epoch) for j in range(n_train // FLAGS.batch_size): perm = np.random.permutation(n_train) session.run( train_op, feed_dict={ nn_input_tensor: x_train[perm[j:j + FLAGS.batch_size]], y_true: y_train[perm[j:j + FLAGS.batch_size]], }) else: if FLAGS.bootstrap: inds = np.random.choice(n_train, n_train, replace=True) x_sampled = x_train[inds] y_sampled = y_train[inds] model.fit( x=x_train if not FLAGS.bootstrap else x_sampled, y=y_train if not FLAGS.bootstrap else y_sampled, batch_size=FLAGS.batch_size, epochs=(FLAGS.batch_size * FLAGS.training_steps) // n_train, validation_data=(x_test, y_test), validation_freq=max( (FLAGS.validation_freq * FLAGS.batch_size) // n_train, 1), verbose=0, callbacks=[tensorboard]) member_filename = os.path.join(member_dir, 'model.weights') ensemble_filenames.append(member_filename) model.save_weights(member_filename) labels = tf.keras.layers.Input(shape=y_train.shape[1:]) ll = tf.keras.backend.function( [model.input, labels], [model.output.distribution.log_prob(labels), model.output.distribution.loc - labels]) ensemble_metrics_vals = { 'train': utils.ensemble_metrics( x_train, y_train, model, ll, weight_files=ensemble_filenames), 'test': utils.ensemble_metrics( x_test, y_test, model, ll, weight_files=ensemble_filenames), } for split, metrics in ensemble_metrics_vals.items(): logging.info(split) for metric_name in metrics: logging.info('%s: %s', metric_name, metrics[metric_name])