def create_layer(self): biasT = init_bias([self.layerSize], self.bias_const) alpha = init_alpha([self.layerSize], self.alpha_const) Channels = int(self.inputT.get_shape()[3]) filterT = init_weight( [self.kernelSize, self.kernelSize, Channels, self.layerSize], self.weight_sdev, self.weight_mean) if self.activation == CM.LEAKY_RELU: self.outputT = tf.nn.leaky_relu( conv2d(self.inputT, filterT, self.stride) + biasT, alpha) elif self.activation == CM.RELU: self.outputT = tf.nn.relu( conv2d(self.inputT, filterT, self.stride) + biasT) self.alpha_const = "NA" elif self.activation == CM.NO_ACTIVATION: self.outputT = conv2d(self.inputT, filterT, self.stride) + biasT self.alpha_const = "NA" else: print_and_log("ERROR: unknown activation {}", self.activation) exit(1) if self.dropout == CM.OFF: return self.outputT else: return tf.nn.dropout(self.outputT, self.dropout)
def set_up_images(self): """ preparing the images for later use """ print_and_log("Setting Up Training Images and Labels") # Vertically stacks the training images self.training_images = np.vstack( [d[b"data"] for d in self.all_train_batches]) self.training_len = len(self.training_images) # Reshapes and normalizes training images (x/255 is normalizing the colorspace) self.training_images = self.training_images.reshape( self.training_len, self.cs_rgb, self.image_width, self.image_height).transpose(0, 2, 3, 1) / self.color_deapth # One hot Encodes the training labels (e.g. [0,0,0,1,0,0,0,0,0,0]) self.training_labels = one_hot_encode( np.hstack([d[b"labels"] for d in self.all_train_batches]), num_labels) print_and_log("Setting Up Test Images and Labels") # Vertically stacks the test images self.test_images = np.vstack([d[b"data"] for d in self.test_batch]) self.test_len = len(self.test_images) # Reshapes and normalizes test images self.test_images = self.test_images.reshape( self.test_len, self.cs_rgb, self.image_width, self.image_height).transpose(0, 2, 3, 1) / self.color_deapth # we want the shape to be [images, W, H, Channels] # One hot Encodes the test labels (e.g. [0,0,0,1,0,0,0,0,0,0]) self.test_labels = one_hot_encode( np.hstack([d[b"labels"] for d in self.test_batch]), num_labels)
def test_model(model_path, load=True): if load: saver.restore(sess, save_path=model_path) test_iteration = 0 test_iteration_accuracy = [] while test_iteration < test_iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('test', test_args_per_batch, args.test_shot, args.test_way) feedDict = { train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: 1.0 } iter_acc = sess.run(accuracy, feedDict) test_iteration_accuracy.append(iter_acc) test_iteration += 1 test_accuracy = np.array(test_iteration_accuracy).mean() * 100.0 confidence_interval_95 = \ (196.0 * np.array(test_iteration_accuracy).std()) / np.sqrt(len(test_iteration_accuracy)) print_and_log( logfile, 'Held out accuracy: {0:5.3f} +/- {1:5.3f} on {2:}'.format( test_accuracy, confidence_interval_95, model_path))
def print_layer(self, layer_ordinal): if self.frac_ratio == None or self.frac_ratio[0] == CM.OFF: print_and_log("{}\t{}\t\t{}\t\t\t\t\t\t\t\t\tK={} ; S={}", layer_ordinal, self.type, self.outputT.shape, self.kernelSize, self.stride) else: print_and_log( "{}\t{}\t\t{}\t\t\t\t\t\t\t\t\tK={} ; S={} frac_ratio={}", layer_ordinal, self.type, self.outputT.shape, self.kernelSize, self.stride, self.frac_ratio)
def test_model(model_path, load=True): if load: saver.restore(sess, save_path=model_path) test_accuracies = run_batches(test_iterations) test_accuracy = np.array(test_accuracies).mean() * 100.0 confidence_interval_95 = (196.0 * np.array(test_accuracies).std() / np.sqrt(len(test_accuracies))) print_and_log( logfile, 'Held out accuracy: {0:5.3f} +/- {1:5.3f} on {2:}'.format( test_accuracy, confidence_interval_95, model_path))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.ERROR) args = parse_command_line() logfile, checkpoint_path_validation, checkpoint_path_final = get_log_files( args.checkpoint_dir) print_and_log(logfile, "Options: %s\n" % args) # Load training and eval data data = get_data('shapenet', data_type=args.type) # tf placeholders batch_train_images = tf.placeholder( tf.float32, [ None, # tasks per batch None, # shot data.get_image_height(), data.get_image_width(), data.get_image_channels() ], name='train_images') batch_test_images = tf.placeholder( tf.float32, [ None, # tasks per batch None, # num test images data.get_image_height(), data.get_image_width(), data.get_image_channels() ], name='test_images') batch_train_angles = tf.placeholder( tf.float32, [ None, # tasks per batch None, # shot data.get_angle_dimensionality() ], name='train_angles') batch_test_angles = tf.placeholder( tf.float32, [ None, # tasks per batch None, # num test angles data.get_angle_dimensionality() ], name='test_angles') def evaluate_task(inputs): train_images, train_angles, test_images, test_angles = inputs inference_features_train = extract_features(images=train_images, output_size=args.d_theta, use_batch_norm=False, dropout_keep_prob=1.0) adaptation_params = shapenet_inference(inference_features_train, train_angles, args.d_theta, args.d_psi, args.samples) test_batch_size = tf.shape(test_images)[0] sample_log_py = [] # loop over samples for n in range(args.samples): adaptation_vector = adaptation_params['psi_samples'][n, :, :] adaptation_inputs = tf.tile(adaptation_vector, [test_batch_size, 1]) generated_images = generate_views(test_angles, adaptation_inputs) # Compute loss flat_images_gt = tf.reshape(test_images, [ -1, data.get_image_height() * data.get_image_width() * data.get_image_channels() ]) flat_images_gen = tf.reshape(generated_images, [ -1, data.get_image_height() * data.get_image_width() * data.get_image_channels() ]) log_var = tf.zeros_like(flat_images_gt) log_density = gaussian_log_density(flat_images_gt, flat_images_gen, log_var) sample_log_py.append(tf.expand_dims(log_density, 1)) task_log_py = tf.reduce_mean(tf.concat(sample_log_py, 1), axis=1) task_loss = -task_log_py return [task_loss, task_log_py] # tf mapping of batch to evaluation function batch_output = tf.map_fn(fn=evaluate_task, elems=(batch_train_images, batch_train_angles, batch_test_images, batch_test_angles), dtype=[tf.float32, tf.float32], parallel_iterations=args.tasks_per_batch) # average all values across batch batch_losses, batch_log_densities = batch_output loss = tf.reduce_mean(batch_losses) log_likelihood = tf.reduce_mean(batch_log_densities) optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) gvs = optimizer.compute_gradients(loss) gvs = [(tf.clip_by_value(grad, -1, 1), var) for grad, var in gvs if grad is not None] train_step = optimizer.apply_gradients(gvs) with tf.Session() as sess: saver = tf.train.Saver() # train the model validation_batches = 100 iteration = 0 best_validation_loss = 5e10 train_iteration_loss = [] sess.run(tf.global_variables_initializer()) while iteration < args.iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch(source='train', tasks_per_batch=args.tasks_per_batch, shot=args.shot) feed_dict = { batch_train_images: train_inputs, batch_test_images: test_inputs, batch_train_angles: train_outputs, batch_test_angles: test_outputs } _, log_py, iteration_loss = sess.run( [train_step, log_likelihood, loss], feed_dict) train_iteration_loss.append(iteration_loss) if (iteration > 0) and (iteration % args.print_freq == 0): validation_iteration, iteration_loss = 0, [] while validation_iteration < validation_batches: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch(source='validation', tasks_per_batch=args.tasks_per_batch, shot=args.shot) feed_dict = { batch_train_images: train_inputs, batch_test_images: test_inputs, batch_train_angles: train_outputs, batch_test_angles: test_outputs } iter_loss = sess.run([loss], feed_dict) iteration_loss.append(iter_loss) validation_iteration += 1 validation_loss = np.array(iteration_loss).mean() train_average_loss = np.array(train_iteration_loss).mean() # save checkpoint if validation is the best so far if validation_loss < best_validation_loss: best_validation_loss = validation_loss saver.save(sess=sess, save_path=checkpoint_path_validation) print_and_log( logfile, 'Iteration: {}, Likelihood: {:5.3f}, Iteration-Train-Loss: {:5.3f},' 'Val-Loss: {:5.3f}'.format(iteration, log_py, train_average_loss, validation_loss)) train_iteration_loss = [] iteration += 1 # save the checkpoint from the final epoch saver.save(sess, save_path=checkpoint_path_final) print_and_log( logfile, 'Fully-trained model saved to: {}'.format(checkpoint_path_final)) print_and_log( logfile, 'Best validation loss: {:5.3f}'.format(best_validation_loss)) print_and_log( logfile, 'Best validation model saved to: {}'.format( checkpoint_path_validation)) logfile.close()
def main(_unused_argv): logger = tf.get_logger() logger.setLevel(logging.ERROR) args = parse_command_line() logfile, checkpoint_path_validation, checkpoint_path_final = get_log_files( args.checkpoint_dir) print_and_log(logfile, "Options: %s\n" % args) # Load training and eval data data = get_data(args.dataset) # set the feature extractor based on the dataset if args.dataset == "Omniglot": feature_extractor_fn = extract_features_omniglot else: feature_extractor_fn = extract_features_mini_imagenet # evaluation samples eval_samples_train = 15 eval_samples_test = args.shot # testing parameters test_iterations = 600 test_args_per_batch = 1 # always use a batch size of 1 for testing # tf placeholders train_images = tf.keras.Input( # shot, *dimensions [ None, data.get_image_height(), data.get_image_width(), data.get_image_channels() ], dtype=tf.float32, name='train_images') test_images = tf.keras.Input( # num test images, *dimensions [ None, data.get_image_height(), data.get_image_width(), data.get_image_channels() ], dtype=tf.float32, name='test_images') train_labels = tf.keras.Input( # shot, way [None, args.way], dtype=tf.float32, name='train_labels') test_labels = tf.keras.Input( # num test images, way [None, args.way], dtype=tf.float32, name='test_labels') dropout_rate = tf.compat.v1.placeholder(tf.float32, [], name='dropout_rate') L = tf.constant(args.samples, dtype=tf.float32, name="num_samples") # Relevant computations for a single task def evaluate_task(inputs): train_inputs, train_outputs, test_inputs, test_outputs = inputs with tf.compat.v1.variable_scope('shared_features'): # extract features from train and test data features_train = feature_extractor_fn(images=train_inputs, output_size=args.d_theta, use_batch_norm=True, rate=dropout_rate) features_test = feature_extractor_fn(images=test_inputs, output_size=args.d_theta, use_batch_norm=True, rate=dropout_rate) # Infer classification layer from q with tf.compat.v1.variable_scope('classifier'): classifier = infer_classifier(features_train, train_outputs, args.d_theta, args.way) # Local reparameterization trick # Compute parameters of q distribution over logits weight_mean = classifier['weight_mean'] weight_log_variance = classifier['weight_log_variance'] bias_mean = classifier["bias_mean"] bias_log_variance = classifier['bias_log_variance'] logits_mean_test = tf.matmul(features_test, weight_mean) + bias_mean logits_log_var_test = \ tf.math.log(tf.matmul(features_test ** 2, tf.exp(weight_log_variance)) + tf.exp(bias_log_variance)) logits_sample_test = sample_normal(logits_mean_test, logits_log_var_test, args.samples) test_labels_tiled = tf.tile(tf.expand_dims(test_outputs, 0), [args.samples, 1, 1]) task_log_py = multinoulli_log_density(inputs=test_labels_tiled, logits=logits_sample_test) averaged_predictions = tf.reduce_logsumexp(logits_sample_test, axis=0) - tf.math.log(L) task_accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(test_outputs, axis=-1), tf.argmax(averaged_predictions, axis=-1)), tf.float32)) task_score = tf.reduce_logsumexp(task_log_py, axis=0) - tf.math.log(L) task_loss = -tf.reduce_mean(task_score, axis=0) return [task_loss, task_accuracy] # tf mapping of batch to evaluation function batch_output = tf.map_fn(fn=evaluate_task, elems=(train_images, train_labels, test_images, test_labels), dtype=[tf.float32, tf.float32], parallel_iterations=args.tasks_per_batch) # average all values across batch batch_losses, batch_accuracies = batch_output loss = tf.reduce_mean(batch_losses) accuracy = tf.reduce_mean(batch_accuracies) def run_batches(num_iters, mode="test"): outputs = [] if mode == "test": batch_args = [ mode, test_args_per_batch, args.test_shot, args.test_way, eval_samples_test ] else: batch_args = [ mode, args.tasks_per_batch, args.shot, args.way, eval_samples_train ] for _ in range(num_iters): train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch(*batch_args) feed_dict = { train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_rate: 1. - args.dropout if mode == "train" else 0. } if mode == "train": _, iter_loss, iter_acc = sess.run([train_step, loss, accuracy], feed_dict) outputs.append((iter_loss, iter_acc)) else: outputs.append(sess.run([accuracy], feed_dict)) return outputs gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( gpu_options=gpu_options)) as sess: saver = tf.compat.v1.train.Saver() if "train" in args.mode: # train the model optimizer = tf.compat.v1.train.AdamOptimizer( learning_rate=args.learning_rate) train_step = optimizer.minimize(loss) validation_batches = 200 best_validation_accuracy = 0.0 sess.run(tf.compat.v1.global_variables_initializer()) # Main training loop for train_loop in range(0, args.iterations, args.print_freq): train_accuracies = [] for iter_loss, iter_accuracy in run_batches(args.print_freq, mode="train"): train_accuracies.append(iter_accuracy) # compute accuracy on validation set validation_accuracies = run_batches(validation_batches, mode="validation") validation_accuracy = np.array(validation_accuracies).mean() train_accuracy = np.array(train_accuracies).mean() # save checkpoint if validation is the best so far if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy saver.save(sess=sess, save_path=checkpoint_path_validation) print_and_log( logfile, 'Iteration: {}, Loss: {:5.3f}, Train-Acc: {:5.3f}, Val-Acc: {:5.3f}' .format(train_loop + args.print_freq, iter_loss, train_accuracy, validation_accuracy)) # save the checkpoint from the final epoch saver.save(sess, save_path=checkpoint_path_final) print_and_log( logfile, f"Fully-trained model saved to: {checkpoint_path_final}") print_and_log( logfile, f"Best validation accuracy: {best_validation_accuracy:5.3f}") print_and_log( logfile, f"Best validation model saved to: {checkpoint_path_validation}" ) def test_model(model_path, load=True): if load: saver.restore(sess, save_path=model_path) test_accuracies = run_batches(test_iterations) test_accuracy = np.array(test_accuracies).mean() * 100.0 confidence_interval_95 = (196.0 * np.array(test_accuracies).std() / np.sqrt(len(test_accuracies))) print_and_log( logfile, 'Held out accuracy: {0:5.3f} +/- {1:5.3f} on {2:}'.format( test_accuracy, confidence_interval_95, model_path)) if args.mode == 'train_test': print_and_log( logfile, 'Train Shot: {0:d}, Train Way: {1:d}, Test Shot {2:d}, Test Way {3:d}' .format(args.shot, args.way, args.test_shot, args.test_way)) # test the model on the final trained model # no need to load the model, it was just trained test_model(checkpoint_path_final, load=False) # test the model on the best validation checkpoint so far test_model(checkpoint_path_validation) elif args.mode == 'test': test_model(args.test_model_path) logfile.close()
def add_layer(self, type, layerSize=None, activation=None, kernelSize=None, stride=None, varInit=None, dropout=None): """ will add a layer to the model :types: "fit_input", "convolution", "max_pool", "normalization", "flaten_4dto2d", "dense" :layerSize: size of the layer (int) :activation: "no_activation", "relu", "leaky_relu" ; default = "relu" :kernelSize: for 2D layers, the kernel size (int) ; default = 2 :stride: for 2D layers, the stride size in each dim ([int, int] ; default = [1, 1] :varInit: [bias_const, alpha_const, weight_sdev, weight_mean] ; default = [0.1, 0.2, 0.1, 0.0] :dropout: OFF/ dropout_value ; default = "off" """ if activation == None: activation = self.activation if kernelSize == None: kernelSize = self.kernelSize if stride == None: stride = self.stride if varInit == None: varInit = self.varInit if dropout == None: dropout = self.dropout inputT = self.NNlayer[self.layer_ordinal].outputT if layerSize == None or layerSize == CM.FIT_INPUT: layerSize = self.prevLayerSize self.prevLayerSize = layerSize if type == CM.CONVOLUTION: with tf.name_scope("{}-Convolution".format(self.layer_ordinal)): layerObj = convolutionLayer(inputT, layerSize, activation, kernelSize, stride, varInit, dropout) layerObj.create_layer() elif type == CM.MAX_POOL: with tf.name_scope("{}-Max_pool".format(self.layer_ordinal)): layerObj = maxPoolLayer(inputT, kernelSize, stride) layerObj.create_layer() elif type == CM.NORMALIZATION: with tf.name_scope("{}-Noramalization".format(self.layer_ordinal)): layerObj = normalizationLayer(inputT) layerObj.create_layer() elif type == CM.FLATEN_4DTO2D: with tf.name_scope("{}-Flaten_4Dto2D".format(self.layer_ordinal)): layerObj = flaten4Dto2DLayer(inputT) layerObj.create_layer() elif type == CM.DENSE: with tf.name_scope("{}-Dense".format(self.layer_ordinal)): layerObj = denseLayer(inputT, layerSize, activation, varInit, dropout) layerObj.create_layer() else: print_and_log("ERROR: unknown layer type {}", type) exit(1) self.NNlayer.append(layerObj) self.layer_ordinal += 1
def print_model_params(self): """ print model layers according to predefined layout """ network_size = calc_params() print_and_log( "==================================================================================" ) print_and_log( "# type shape activation dropout additional" ) print_and_log( "------------------+-------------------+----------------+-----------+-------------------" ) for layer in range(0, self.layer_ordinal + 1): self.NNlayer[layer].print_layer(layer) print_and_log( "==================================================================================" ) print_and_log("Network Size: {}", network_size) print_and_log( "==================================================================================" )
def print_layer(self, layer_ordinal): print_and_log("{}\t{}\t\t\t{}", layer_ordinal, self.type, self.outputT.shape)
def print_layer(self, layer_ordinal): print_and_log("{}\t{}\t\t\t{}\t\t{}\t\t{}", layer_ordinal, self.type, self.outputT.shape, self.activation, self.dropout_str)
def print_layer(self, layer_ordinal): print_and_log("{}\t{}\t\t{}\t\t{}\t\t\t{}\t\t\tK={} ; S={}", layer_ordinal, self.type, self.outputT.shape, self.activation, self.dropout_str, self.kernelSize, self.stride)
def print_params(): print_and_log( "==================================================================================" ) print_and_log("seed: {}", CM.SEED) print_and_log( "==================================================================================" ) print_and_log("learning_rate {} every {} epochs", LEARNING_RATE, LR_PROGRESSION) print_and_log("momentum {}", MOMENTUM) print_and_log("train_dropout {}", TRAIN_DROPOUT)
with tf.name_scope("test_accuracy"): tf.summary.scalar('test_accuracy', acc) merged_test = tf.summary.merge_all() # RUN PREPERATIONS saver = tf.train.Saver() # so we can save the model init_vars = tf.global_variables_initializer() feed_dict_test = { x: CIFR.test_images, y_true: CIFR.test_labels, dropout: TEST_DROPOUT, learning_rate: TEST_LR } train_dropout = TRAIN_DROPOUT # initial dropout value best_test_accuracy = BEST_ACCURACY_THRESHOLD print_and_log("{} START session:", datetime.datetime.now().strftime("%Y.%m.%d %H:%M:%S")) if TEST_ONLY == CM.ON: # TEST ALREADY TRAINED MODEL: with tf.Session() as sess: writer = tf.summary.FileWriter(TCM.ENSORBOARD_PATH, sess.graph) # RESTORE MODEL restore_model(sess, TRAINED_MODEL_NAME ) # supplying the same name - it's not a filename!! # TEST MODEL test_accuracy, test_summary = sess.run([acc, merged_test], feed_dict=feed_dict_test) writer.add_summary(test_summary, 0) print_and_log_timestamp("accuracy: {}", test_accuracy) writer.close() else:
def main(unused_argv): tf.logging.set_verbosity(tf.logging.ERROR) args = parse_command_line() logfile, checkpoint_path_validation, checkpoint_path_final = get_log_files(args.checkpoint_dir) print_and_log(logfile, "Options: %s\n" % args) # Load training and eval data data = get_data(args.dataset) # set the feature extractor based on the dataset feature_extractor_fn = extract_features_mini_imagenet if args.dataset == "Omniglot": feature_extractor_fn = extract_features_omniglot if args.dataset == "celebA": feature_extractor_fn = extract_features_celeba # evaluation samples eval_samples_train = 15 eval_samples_test = args.shot # testing parameters test_iterations = 600 test_args_per_batch = 1 # always use a batch size of 1 for testing # tf placeholders train_images = tf.placeholder(tf.float32, [None, # tasks per batch None, # shot data.get_image_height(), data.get_image_width(), data.get_image_channels()], name='train_images') test_images = tf.placeholder(tf.float32, [None, # tasks per batch None, # num test images data.get_image_height(), data.get_image_width(), data.get_image_channels()], name='test_images') train_labels = tf.placeholder(tf.float32, [None, # tasks per batch None, # shot args.way], name='train_labels') test_labels = tf.placeholder(tf.float32, [None, # tasks per batch None, # num test images args.way], name='test_labels') dropout_keep_prob = tf.placeholder(tf.float32, [], name='dropout_keep_prob') L = tf.constant(args.samples, dtype=tf.float32, name="num_samples") # Relevant computations for a single task def evaluate_task(inputs): train_inputs, train_outputs, test_inputs, test_outputs = inputs with tf.variable_scope('shared_features'): # extract features from train and test data features_train = feature_extractor_fn(images=train_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) features_test = feature_extractor_fn(images=test_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) # Infer classification layer from q with tf.variable_scope('classifier'): classifier = infer_classifier(features_train, train_outputs, args.d_theta, args.way) # Local reparameterization trick # Compute parameters of q distribution over logits weight_mean, bias_mean = classifier['weight_mean'], classifier['bias_mean'] weight_log_variance, bias_log_variance = classifier['weight_log_variance'], classifier['bias_log_variance'] logits_mean_test = tf.matmul(features_test, weight_mean) + bias_mean logits_log_var_test = \ tf.log(tf.matmul(features_test ** 2, tf.exp(weight_log_variance)) + tf.exp(bias_log_variance)) logits_sample_test = sample_normal(logits_mean_test, logits_log_var_test, args.samples) test_labels_tiled = tf.tile(tf.expand_dims(test_outputs, 0), [args.samples, 1, 1]) task_log_py = multinoulli_log_density(inputs=test_labels_tiled, logits=logits_sample_test) averaged_predictions = tf.reduce_logsumexp(logits_sample_test, axis=0) - tf.log(L) task_accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(test_outputs, axis=-1), tf.argmax(averaged_predictions, axis=-1)), tf.float32)) task_score = tf.reduce_logsumexp(task_log_py, axis=0) - tf.log(L) task_loss = -tf.reduce_mean(task_score, axis=0) return [task_loss, task_accuracy] # tf mapping of batch to evaluation function batch_output = tf.map_fn(fn=evaluate_task, elems=(train_images, train_labels, test_images, test_labels), dtype=[tf.float32, tf.float32], parallel_iterations=args.tasks_per_batch) # average all values across batch batch_losses, batch_accuracies = batch_output loss = tf.reduce_mean(batch_losses) accuracy = tf.reduce_mean(batch_accuracies) def evaluate_task_celeba(inputs): train_inputs, train_outputs, test_inputs, test_outputs = inputs with tf.variable_scope('shared_features'): # extract features from train and test data features_train = feature_extractor_fn(images=train_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) features_test = feature_extractor_fn(images=test_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) # Infer classification layer from q with tf.variable_scope('classifier'): classifier = infer_classifier(features_train, train_outputs, args.d_theta, args.way) # Local reparameterization trick # Compute parameters of q distribution over logits weight_mean, bias_mean = classifier['weight_mean'], classifier['bias_mean'] weight_log_variance, bias_log_variance = classifier['weight_log_variance'], classifier['bias_log_variance'] return [features_test, weight_mean, bias_mean, weight_log_variance, bias_log_variance] # tf mapping of batch to evaluation function batch_output_celeba = tf.map_fn(fn=evaluate_task_celeba, elems=(train_images, train_labels, test_images, test_labels), dtype=[tf.float32, tf.float32, tf.float32, tf.float32, tf.float32], parallel_iterations=args.tasks_per_batch) # average all values across batch features_test, weight_mean, bias_mean, weight_log_variance, bias_log_variance = batch_output_celeba with tf.Session() as sess: saver = tf.train.Saver() if args.mode == 'train' or args.mode == 'train_test': # train the model optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) train_step = optimizer.minimize(loss) validation_batches = 200 iteration = 0 best_validation_accuracy = 0.0 train_iteration_accuracy = [] sess.run(tf.global_variables_initializer()) # Main training loop while iteration < args.iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('train', args.tasks_per_batch, args.shot, args.way, eval_samples_train) feed_dict = {train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: args.dropout} _, iteration_loss, iteration_accuracy = sess.run([train_step, loss, accuracy], feed_dict) train_iteration_accuracy.append(iteration_accuracy) if (iteration > 0) and (iteration % args.print_freq == 0): # compute accuracy on validation set validation_iteration_accuracy = [] validation_iteration = 0 while validation_iteration < validation_batches: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('validation', args.tasks_per_batch, args.shot, args.way, eval_samples_test) feed_dict = {train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: 1.0} iteration_accuracy = sess.run(accuracy, feed_dict) validation_iteration_accuracy.append(iteration_accuracy) validation_iteration += 1 validation_accuracy = np.array(validation_iteration_accuracy).mean() train_accuracy = np.array(train_iteration_accuracy).mean() # save checkpoint if validation is the best so far if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy saver.save(sess=sess, save_path=checkpoint_path_validation) print_and_log(logfile, 'Iteration: {}, Loss: {:5.3f}, Train-Acc: {:5.3f}, Val-Acc: {:5.3f}' .format(iteration, iteration_loss, train_accuracy, validation_accuracy)) train_iteration_accuracy = [] iteration += 1 # save the checkpoint from the final epoch saver.save(sess, save_path=checkpoint_path_final) print_and_log(logfile, 'Fully-trained model saved to: {}'.format(checkpoint_path_final)) print_and_log(logfile, 'Best validation accuracy: {:5.3f}'.format(best_validation_accuracy)) print_and_log(logfile, 'Best validation model saved to: {}'.format(checkpoint_path_validation)) def test_model(model_path, load=True): if load: saver.restore(sess, save_path=model_path) test_iteration = 0 test_iteration_accuracy = [] while test_iteration < test_iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('test', test_args_per_batch, args.test_shot, args.test_way, eval_samples_test) feedDict = {train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: 1.0} iter_acc = sess.run(accuracy, feedDict) test_iteration_accuracy.append(iter_acc) test_iteration += 1 test_accuracy = np.array(test_iteration_accuracy).mean() * 100.0 confidence_interval_95 = \ (196.0 * np.array(test_iteration_accuracy).std()) / np.sqrt(len(test_iteration_accuracy)) print_and_log(logfile, 'Held out accuracy: {0:5.3f} +/- {1:5.3f} on {2:}' .format(test_accuracy, confidence_interval_95, model_path)) if args.mode == 'train_test': print_and_log(logfile, 'Train Shot: {0:d}, Train Way: {1:d}, Test Shot {2:d}, Test Way {3:d}' .format(args.shot, args.way, args.test_shot, args.test_way)) # test the model on the final trained model # no need to load the model, it was just trained test_model(checkpoint_path_final, load=False) # test the model on the best validation checkpoint so far test_model(checkpoint_path_validation) if args.mode == 'test': test_model(args.test_model_path) def test_celeba(model_path, load=True, path_to_save_results=''): if load: saver.restore(sess, save_path=model_path) test_eval_samples = 5 # number of query points MAX_TEST_TASKS = 100 output_tensors = [features_test, weight_mean, bias_mean, weight_log_variance, bias_log_variance] all_accuracy = [] all_clusters = [] all_log_probs = [] data.init_testing_params(args.test_way) # compute test accuracy n_test_examples = 0 for i in cycle(range(data.n_test_triplets)): n_test_examples += 1 train_inputs, test_inputs, train_outputs, test_outputs = data.get_batch( 'test', 3, args.test_shot, args.test_way, test_eval_samples) # train_inputs, test_inputs, train_outputs, test_outputs = data.get_test_triplet_batch_new( # args.test_shot, args.test_way, test_eval_samples, i) print(train_inputs.shape, test_inputs.shape, train_outputs.shape, test_outputs.shape) # split out batch feedDict = {train_images: train_inputs, # [3, 2*5, 84, 84, 3] support inputs (three same things) test_images: test_inputs, # [3, 2*5, 84, 84, 3] query inputs (three different things) train_labels: train_outputs, # [3, 2*5, 2] support outputs test_labels: test_outputs, # [3, 2*5, 2] query outputs dropout_keep_prob: 1.0} ft, wm, bm, wlv, blv = sess.run(output_tensors, feedDict) n_samples = 40 n_marginal = 100 counter = 0 cluster = [] all_preds = [] # weight_normal = np.random.normal(loc=wm, scale=np.sqrt(np.exp(wlv))) # bias_normal = np.random.normal(loc=bm, scale=np.sqrt(np.exp(blv))) print("Tested on test_example {}".format(n_test_examples)) while counter < n_samples: counter += 1 # TODO: are w and b correct? w = wm + np.random.normal(loc=0.0, scale=1.0, size=1) * np.sqrt(np.exp(wlv)) # sample from normal(weight_mean, exp(weight_log_variance)) b = bm + np.random.normal(loc=0.0, scale=1.0, size=1) * np.sqrt(np.exp(blv)) # sample from normal (bias_mean, exp(bias_log_variance)) outputs = [np.matmul(ft, w) + b.reshape(3, 1, 2)] task_probs = celeba_test_utils.np_softmax(outputs[0]) _task_log_probs = np.log(task_probs[np.where(test_outputs == 1)]).reshape([3, -1]) task_log_probs = np.mean(_task_log_probs, axis=-1) most_likely_idx = task_log_probs.argmax() cluster.append(most_likely_idx) all_preds.append(outputs[0][most_likely_idx].argmax(axis=-1)) accuracy = celeba_test_utils.np_accuracy(outputs[0], test_outputs, average=False) all_accuracy.append(accuracy[most_likely_idx].mean()) all_clusters.append(cluster) # compute marginal for that task def compute_marginal_batch(sess, input_tensors, n_samples, inputa, labela, inputb, labelb): """ Compute the marginal for a group of tasks """ # TODO: move this function to celeba_test_utils ndims = len(inputa.shape) n_tasks = inputa.shape[0] tile_shape = (n_samples,) + (ndims) * (1,) # tile and collapse inputa_tiled = np.reshape( np.tile(inputa[None, :], tile_shape), (-1,) + inputa.shape[1:]) inputb_tiled = np.reshape( np.tile(inputb[None, :], tile_shape), (-1,) + inputb.shape[1:]) labela_tiled = np.reshape( np.tile(labela[None, :], tile_shape), (-1,) + labela.shape[1:]) labelb_tiled = np.reshape( np.tile(labelb[None, :], tile_shape), (-1,) + labelb.shape[1:]) feedDict = {train_images: inputa_tiled, test_images: inputb_tiled, train_labels: labela_tiled, test_labels: labelb_tiled, dropout_keep_prob: 1.0} ft, wm, bm, wlv, blv = sess.run(input_tensors, feedDict) # TODO: are w and b correct? w = wm + np.random.normal(loc=0.0, scale=1.0, size=1) * np.sqrt(np.exp(wlv)) # sample from normal(weight_mean, exp(weight_log_variance)) b = bm + np.random.normal(loc=0.0, scale=1.0, size=1) * np.sqrt(np.exp(blv)) # sample from normal (bias_mean, exp(bias_log_variance)) outputs = np.matmul(ft, w) + b probs = celeba_test_utils.np_softmax(outputs[0].reshape([-1, args.test_way])) probs = probs.reshape((n_samples, n_tasks) + outputs[0].shape[1:]) # average over samples marginal_probs = probs.mean(axis=0) preds = np.argmax(marginal_probs, axis=-1) return marginal_probs, preds # TODO: call celeba_test_utils.compute_marginal_batch # marginal_p, _ = compute_marginal_batch(sess, output_tensors, n_marginal, train_inputs, train_outputs, test_inputs, test_outputs) # all_log_probs.append(np.mean(np.log(marginal_p[np.where(test_outputs == 1)]))) if n_test_examples >= MAX_TEST_TASKS: break print("Test") print("all_accuracy: {}".format(np.mean(all_accuracy))) print("all_accuracy confidence: {}".format(np.std(all_accuracy) * 1.96 / np.sqrt(len(all_accuracy)))) coverage = [len(np.unique(cl)) for cl in all_clusters] print("all_coverage: {}".format(np.mean(coverage))) print("all_coverage confidence: {}".format(np.std(coverage) * 1.96 / np.sqrt(len(coverage)))) # print("all_logprob: {}".format(np.mean(all_log_probs))) # print("all_logprobs confidence: {}".format(np.std(all_log_probs) * 1.96 / np.sqrt(len(all_log_probs)))) if path_to_save_results != '': with open(path_to_save_results, 'a') as f: f.write('\n\nIteration: {0}, test axample {1}\n'.format(i, n_test_examples)) f.write("all_accuracy: {}\n".format(np.mean(all_accuracy))) f.write("all_accuracy confidence: {}\n".format(np.std(all_accuracy) * 1.96 / np.sqrt(len(all_accuracy)))) f.write("all_coverage: {}\n".format(np.mean(coverage))) f.write("all_coverage confidence: {}\n".format(np.std(coverage) * 1.96 / np.sqrt(len(coverage)))) if path_to_save_results != '': with open(path_to_save_results, 'a') as f: f.write('Model: {0}\n'.format(path_to_save_results)) if args.mode == 'test_celeba': test_celeba(args.test_model_path, path_to_save_results=args.checkpoint_dir + '.txt')
def main(unused_argv): tf.logging.set_verbosity(tf.logging.ERROR) args = parse_command_line() logfile, checkpoint_path_validation, checkpoint_path_final = get_log_files( args.checkpoint_dir) print_and_log(logfile, "Options: %s\n" % args) # Load training and eval data data = get_data(args.dataset) # testing parameters test_iterations = 600 test_args_per_batch = 1 # always use a batch size of 1 for testing # tf placeholders train_images = tf.placeholder( tf.float32, [ None, # tasks per batch None, # shot data.get_image_height(), data.get_image_width(), data.get_image_channels() ], name='train_images') test_images = tf.placeholder( tf.float32, [ None, # tasks per batch None, # num test images data.get_image_height(), data.get_image_width(), data.get_image_channels() ], name='test_images') train_labels = tf.placeholder( tf.float32, [ None, # tasks per batch None, # shot args.way ], name='train_labels') test_labels = tf.placeholder( tf.float32, [ None, # tasks per batch None, # num test images args.way ], name='test_labels') dropout_keep_prob = tf.placeholder(tf.float32, [], name='dropout_keep_prob') # Relevant computations for a single task def evaluate_task(inputs): train_inputs, train_outputs, test_inputs, test_outputs = inputs with tf.variable_scope('shared_features'): # extract features from train and test data features_train = extract_features( images=train_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) features_test = extract_features( images=test_inputs, output_size=args.d_theta, use_batch_norm=True, dropout_keep_prob=dropout_keep_prob) # Infer classification layer from q with tf.variable_scope('classifier'): classifier = infer_classifier(features_train, train_outputs, args.d_theta, args.way) # Local reparameterization trick # Compute parameters of q distribution over logits weight_mean, bias_mean = classifier['weight_mean'], classifier[ 'bias_mean'] weight_log_variance, bias_log_variance = classifier[ 'weight_log_variance'], classifier['bias_log_variance'] logits_mean_test = tf.matmul(features_test, weight_mean) + bias_mean logits_log_var_test =\ tf.log(tf.matmul(features_test ** 2, tf.exp(weight_log_variance)) + tf.exp(bias_log_variance)) # Sample from q(logits) and compute expectations of accuracy and log-likelihood logits_sample_test = sample_normal(logits_mean_test, logits_log_var_test, args.samples) test_labels_tiled = tf.tile(tf.expand_dims(test_outputs, 0), [args.samples, 1, 1]) task_log_py = tf.reduce_mean( tf.reduce_sum(multinoulli_log_density(inputs=test_labels_tiled, logits=logits_sample_test), axis=-1)) task_accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(test_labels_tiled, axis=-1), tf.argmax(logits_sample_test, axis=-1)), tf.float32)) task_loss = -task_log_py return [task_loss, task_accuracy] # tf mapping of batch to evaluation function batch_output = tf.map_fn(fn=evaluate_task, elems=(train_images, train_labels, test_images, test_labels), dtype=[tf.float32, tf.float32], parallel_iterations=args.tasks_per_batch) # average all values across batch batch_losses, batch_accuracies = batch_output loss = tf.reduce_mean(batch_losses) accuracy = tf.reduce_mean(batch_accuracies) with tf.Session() as sess: saver = tf.train.Saver() if args.mode == 'train' or args.mode == 'train_test': # train the model optimizer = tf.train.AdamOptimizer( learning_rate=args.learning_rate) train_step = optimizer.minimize(loss) validation_batches = 100 iteration = 0 best_validation_accuracy = 0.0 train_iteration_accuracy = [] sess.run(tf.global_variables_initializer()) # Main training loop while iteration < args.iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('train', args.tasks_per_batch, args.shot, args.way) feed_dict = { train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: args.dropout } _, iteration_loss, iteration_accuracy = sess.run( [train_step, loss, accuracy], feed_dict) train_iteration_accuracy.append(iteration_accuracy) if (iteration > 0) and (iteration % args.print_freq == 0): # compute accuracy on validation set validation_iteration_accuracy = [] validation_iteration = 0 while validation_iteration < validation_batches: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('validation', args.tasks_per_batch, args.shot, args.way) feed_dict = { train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: 1.0 } iteration_accuracy = sess.run(accuracy, feed_dict) validation_iteration_accuracy.append( iteration_accuracy) validation_iteration += 1 validation_accuracy = np.array( validation_iteration_accuracy).mean() train_accuracy = np.array(train_iteration_accuracy).mean() # save checkpoint if validation is the best so far if validation_accuracy > best_validation_accuracy: best_validation_accuracy = validation_accuracy saver.save(sess=sess, save_path=checkpoint_path_validation) print_and_log( logfile, 'Iteration: {}, Loss: {:5.3f}, Train-Acc: {:5.3f}, Val-Acc: {:5.3f}' .format(iteration, iteration_loss, train_accuracy, validation_accuracy)) train_iteration_accuracy = [] iteration += 1 # save the checkpoint from the final epoch saver.save(sess, save_path=checkpoint_path_final) print_and_log( logfile, 'Fully-trained model saved to: {}'.format( checkpoint_path_final)) print_and_log( logfile, 'Best validation accuracy: {:5.3f}'.format( best_validation_accuracy)) print_and_log( logfile, 'Best validation model saved to: {}'.format( checkpoint_path_validation)) def test_model(model_path, load=True): if load: saver.restore(sess, save_path=model_path) test_iteration = 0 test_iteration_accuracy = [] while test_iteration < test_iterations: train_inputs, test_inputs, train_outputs, test_outputs = \ data.get_batch('test', test_args_per_batch, args.test_shot, args.test_way) feedDict = { train_images: train_inputs, test_images: test_inputs, train_labels: train_outputs, test_labels: test_outputs, dropout_keep_prob: 1.0 } iter_acc = sess.run(accuracy, feedDict) test_iteration_accuracy.append(iter_acc) test_iteration += 1 test_accuracy = np.array(test_iteration_accuracy).mean() * 100.0 confidence_interval_95 = \ (196.0 * np.array(test_iteration_accuracy).std()) / np.sqrt(len(test_iteration_accuracy)) print_and_log( logfile, 'Held out accuracy: {0:5.3f} +/- {1:5.3f} on {2:}'.format( test_accuracy, confidence_interval_95, model_path)) if args.mode == 'train_test': print_and_log( logfile, 'Train Shot: {0:d}, Train Way: {1:d}, Test Shot {2:d}, Test Way {3:d}' .format(args.shot, args.way, args.test_shot, args.test_way)) # test the model on the final trained model # no need to load the model, it was just trained test_model(checkpoint_path_final, load=False) # test the model on the best validation checkpoint so far test_model(checkpoint_path_validation) if args.mode == 'test': test_model(args.test_model_path) logfile.close()