def load_batch_with_text(dataset, batch_size=32, shuffle=True, height=299, width=299, is_training=False): """Load a single batch of data. Args: dataset: The dataset to load. batch_size: The number of images in the batch. shuffle: Whether to shuffle the data sources and common queue when reading. height: The size of each image after preprocessing. width: The size of each image after preprocessing. is_training: Whether or not we're currently training or evaluating. Returns: images: A Tensor of size [batch_size, height, width, 3], image samples that have been preprocessed. images_raw: A Tensor of size [batch_size, height, width, 3], image samples that can be used for visualization. labels: A Tensor of size [batch_size], whose values range between 0 and dataset.num_classes. """ # For validation, if you set the common_queue_capacity to something lower than # batch_size, which is the validation size, then your output will contain duplicates. data_provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=shuffle, common_queue_capacity=batch_size, common_queue_min=8) image_raw, text, seq_len, label, post_id, day = data_provider.get( ['image', 'text', 'seq_len', 'label', 'post_id', 'day']) # Preprocess image for usage by Inception. image = inception_preprocessing.preprocess_image(image_raw, height, width, is_training=is_training) # Preprocess the image for display purposes. image_raw = tf.expand_dims(image_raw, 0) image_raw = tf.image.resize_images(image_raw, [height, width]) image_raw = tf.squeeze(image_raw) # Batch it up. images, images_raw, texts, seq_lens, labels, post_ids, days = tf.train.batch( [image, image_raw, text, seq_len, label, post_id, day], batch_size=batch_size, num_threads=1, capacity=2 * batch_size) return images, images_raw, texts, seq_lens, labels, post_ids, days
def oasis_evaluation(checkpoint_dir, num_classes): """Compute the logits of the OASIS dataset. Parameters: checkpoint_dir: Checkpoint of the saved model during training. num_classes: Number of classes. """ with tf.Graph().as_default(): config = _CONFIG.copy() mode = 'validation' dataset_dir = config['dataset_dir'] text_dir = config['text_dir'] emb_dir = config['emb_dir'] filename = config['filename'] initial_lr = config['initial_lr'] #batch_size = config['batch_size'] im_features_size = config['im_features_size'] rnn_size = config['rnn_size'] final_endpoint = config['final_endpoint'] tf.logging.set_verbosity(tf.logging.INFO) batch_size = 1 image_size = inception_v1.default_image_size images = tf.placeholder(tf.float32, [image_size, image_size, 3]) images_prep = inception_preprocessing.preprocess_image( images, image_size, image_size, is_training=False) images_prep_final = tf.expand_dims(images_prep, 0) texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE]) seq_lens = tf.placeholder(tf.int32, [batch_size]) # Create the model, use the default arg scope to configure the batch norm parameters. is_training = (mode == 'train') with slim.arg_scope(inception_v1.inception_v1_arg_scope()): images_features, _ = inception_v1.inception_v1( images_prep_final, final_endpoint=final_endpoint, num_classes=im_features_size, is_training=is_training) # Text model vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) vocab_size, embedding_dim = embedding.shape word_to_id = dict(zip(vocabulary, range(vocab_size))) # Unknown words = vector with zeros embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))]) word_to_id['<ukn>'] = vocab_size vocab_size = len(word_to_id) nb_emotions = num_classes with tf.variable_scope('Text'): # Word embedding W_embedding = tf.get_variable('W_embedding', [vocab_size, embedding_dim], trainable=False) input_embed = tf.nn.embedding_lookup(W_embedding, texts) # LSTM cell = tf.contrib.rnn.BasicLSTMCell(rnn_size) rnn_outputs, final_state = tf.nn.dynamic_rnn( cell, input_embed, sequence_length=seq_lens, dtype=tf.float32) # Need to convert seq_lens to int32 for stack texts_features = tf.gather_nd( rnn_outputs, tf.stack( [tf.range(batch_size), tf.cast(seq_lens, tf.int32) - 1], axis=1)) # Concatenate image and text features concat_features = tf.concat([images_features, texts_features], axis=1) # Dense layer W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size]) b_fc = tf.get_variable('b_fc', [fc_size]) dense_layer = tf.matmul(concat_features, W_fc) + b_fc dense_layer_relu = tf.nn.relu(dense_layer) W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions]) b_softmax = tf.get_variable('b_softmax', [nb_emotions]) logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) # Load oasis dataset df_oasis = pd.read_csv('data/oasis/OASIS.csv', encoding='utf-8') def load_image(name): im_path = 'data/oasis/images/' + name.strip() + '.jpg' one_im = imread(im_path) one_im = imresize(one_im, ((image_size, image_size, 3)))[:, :, :3] # to get rid of alpha channel return one_im df_oasis['image'] = df_oasis['Theme'].map(lambda x: load_image(x)) df_oasis['Theme'] = df_oasis['Theme'].map( lambda x: ''.join([i for i in x if not i.isdigit()]).strip()) vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) word_to_id = dict(zip(vocabulary, range(len(vocabulary)))) df_oasis['text_list'], df_oasis['text_len'] = zip( *df_oasis['Theme'].map(lambda x: _paragraph_to_ids( x, word_to_id, _POST_SIZE, emotions=''))) with monitored_session.MonitoredSession( session_creator=session_creator, hooks=None) as session: nb_iter = df_oasis.shape[0] / batch_size scores = [] for i in range(nb_iter): np_images = df_oasis['image'][(i * batch_size):((i + 1) * batch_size)] np_texts = np.vstack( df_oasis['text_list'][(i * batch_size):((i + 1) * batch_size)]) np_seq_lens = df_oasis['text_len'][( i * batch_size):((i + 1) * batch_size)].values print(np_images.shape) session.run(images, feed_dict={images: np_images}) print(np_texts.shape) session.run(texts, feed_dict={texts: np_texts}) print(np_seq_lens.shape) session.run(seq_lens, feed_dict={seq_lens: np_seq_lens}) #scores.append(session.run(logits, feed_dict={images: np_images, texts: np_texts, seq_lens: np_seq_lens})) scores = np.vstack(scores) np.save('data/oasis_logits.npy', scores) return scores
def class_visualisation(label, learning_rate, checkpoint_dir): """Visualise class with gradient ascent. Parameters: label: Label to visualise. learning_rate: Learning rate of the gradient ascent. checkpoint_dir: Checkpoint of the saved model during training. """ with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) image_size = inception_v1.default_image_size image = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) # Text model text_dir = 'text_model' emb_dir = 'embedding_weights' filename = 'glove.6B.50d.txt' vocabulary, embedding = _load_embedding_weights_glove( text_dir, emb_dir, filename) vocab_size, embedding_dim = embedding.shape word_to_id = dict(zip(vocabulary, range(vocab_size))) # Create text with only unknown words text = tf.constant( np.ones((1, _POST_SIZE), dtype=np.int32) * vocab_size) im_features_size = 128 # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception_v1.inception_v1_arg_scope()): images_features, _ = inception_v1.inception_v1( image, num_classes=im_features_size, is_training=True) # Unknown words = vector with zeros embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))]) word_to_id['<ukn>'] = vocab_size vocab_size = len(word_to_id) nb_emotions = 6 with tf.variable_scope('Text'): embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embedding_dim]) # Word embedding W_embedding = tf.get_variable('W_embedding', [vocab_size, embedding_dim], trainable=False) embedding_init = W_embedding.assign(embedding_placeholder) input_embed = tf.nn.embedding_lookup(W_embedding, text) #input_embed_dropout = tf.nn.dropout(input_embed, self.keep_prob) # Rescale the mean by the actual number of non-zero values. nb_finite = tf.reduce_sum(tf.cast(tf.not_equal(input_embed, 0.0), tf.float32), axis=1) # If a post has zero finite elements, replace nb_finite by 1 nb_finite = tf.where(tf.equal(nb_finite, 0.0), tf.ones_like(nb_finite), nb_finite) h1 = tf.reduce_mean(input_embed, axis=1) * _POST_SIZE / nb_finite fc1_size = 2048 # Fully connected layer W_fc1 = tf.get_variable('W_fc1', [embedding_dim, fc1_size]) b_fc1 = tf.get_variable('b_fc1', [fc1_size]) texts_features = tf.matmul(h1, W_fc1) + b_fc1 texts_features = tf.nn.relu(texts_features) # Concatenate image and text features concat_features = tf.concat([images_features, texts_features], axis=1) W_softmax = tf.get_variable('W_softmax', [im_features_size + fc1_size, nb_emotions]) b_softmax = tf.get_variable('b_softmax', [nb_emotions]) logits = tf.matmul(concat_features, W_softmax) + b_softmax class_score = logits[:, label] l2_reg = 0.001 regularisation = l2_reg * tf.square(tf.norm(image)) obj_function = class_score - regularisation grad_obj_function = tf.gradients(obj_function, image)[0] grad_normalized = grad_obj_function / tf.norm(grad_obj_function) # Initialise image image_init = tf.random_normal([image_size, image_size, 3]) image_init = inception_preprocessing.preprocess_image( image_init, image_size, image_size, is_training=False) image_init = tf.expand_dims(image_init, 0) # Load model checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir) scaffold = monitored_session.Scaffold(init_op=None, init_feed_dict=None, init_fn=None, saver=None) session_creator = monitored_session.ChiefSessionCreator( scaffold=scaffold, checkpoint_filename_with_path=checkpoint_path, master='', config=None) blur_every = 10 max_jitter = 16 show_every = 50 clip_percentile = 20 with monitored_session.MonitoredSession( session_creator=session_creator, hooks=None) as session: np_image = session.run(image_init) num_iterations = 500 for i in range(num_iterations): # Randomly jitter the image a bit ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2) np_image = np.roll(np.roll(np_image, ox, 1), oy, 2) # Update image grad_update = session.run(grad_normalized, feed_dict={image: np_image}) np_image += learning_rate * grad_update # Undo the jitter np_image = np.roll(np.roll(np_image, -ox, 1), -oy, 2) # As a regularizer, clip and periodically blur #np_image = np.clip(np_image, -0.2, 0.8) # Set pixels with small norm to zero min_norm = np.percentile(np_image, clip_percentile) np_image[np_image < min_norm] = 0.0 if i % blur_every == 0: np_image = blur_image(np_image, sigma=0.5) if i % show_every == 0 or i == (num_iterations - 1): plt.imshow(deprocess_image(np_image[0])) plt.title('Iteration %d / %d' % (i + 1, num_iterations)) plt.gcf().set_size_inches(4, 4) plt.axis('off') plt.show()
def train(params): g = tf.Graph() with g.as_default(), tf.device('/cpu:0'): tf.set_random_seed(params['seed']) dataset_train = imagenet.get_split('train', params['data_dir']) provider_train = tf.contrib.slim.dataset_data_provider.DatasetDataProvider( dataset_train, num_readers=4, common_queue_capacity=20 * params['batch_size'], common_queue_min=10 * params['batch_size'], ) [image, label] = provider_train.get(['image', 'label']) label -= params['labels_offset'] #[1,1000] to [0,999] image = inception_preprocessing.preprocess_image( image, params['train_image_size'], params['train_image_size'], True) images_train, labels_train = tf.train.batch( [image, label], batch_size=params['batch_size'], num_threads=4, capacity=5 * params['batch_size']) labels_train = tf.contrib.slim.one_hot_encoding( labels_train, dataset_train.num_classes - params['labels_offset']) dataset_valid = imagenet.get_split('validation', params['data_dir'], 'valid') provider_valid = tf.contrib.slim.dataset_data_provider.DatasetDataProvider( dataset_valid, num_readers=4, common_queue_capacity=20 * 100, common_queue_min=10 * 100, ) [image, label] = provider_valid.get(['image', 'label']) label -= params['labels_offset'] # [1,1000] to [0,999] image = inception_preprocessing.preprocess_image( image, params['eval_image_size'], params['eval_image_size'], False) images_valid, labels_valid = tf.train.batch([image, label], batch_size=100, num_threads=4, capacity=5 * 100) labels_valid = tf.contrib.slim.one_hot_encoding( labels_valid, dataset_valid.num_classes - params['labels_offset']) train_cross_entropy, train_loss, learning_rate, train_top1_accuracy, train_top5_accuracy, train_op, global_step = get_train_ops( images_train, labels_train, params) _log_variable_sizes(tf.trainable_variables(), 'Trainable Variables') test_cross_entropy, test_loss, test_top1_accuracy, test_top5_accuracy = get_test_ops( images_valid, labels_valid, params, True) saver = tf.train.Saver(max_to_keep=30) checkpoint_saver_hook = tf.train.CheckpointSaverHook( params['model_dir'], save_steps=params['batches_per_epoch'], saver=saver) hooks = [checkpoint_saver_hook] tf.logging.info('Starting Session') config = tf.ConfigProto(allow_soft_placement=True) with tf.train.SingularMonitoredSession( config=config, hooks=hooks, checkpoint_dir=params['model_dir']) as sess: start_time = time.time() calcluate_flops(g, sess) while True: run_ops = [ train_cross_entropy, train_loss, learning_rate, train_top1_accuracy, train_top5_accuracy, train_op, global_step ] train_cross_entropy_v, train_loss_v, learning_rate_v, train_top1_accuracy_v, train_top5_accuracy_v, _, global_step_v = sess.run( run_ops) epoch = global_step_v // params['batches_per_epoch'] curr_time = time.time() if global_step_v % 100 == 0: log_string = "epoch={:<6d} ".format(epoch) log_string += "step={:<6d} ".format(global_step_v) log_string += "cross_entropy={:<6f} ".format( train_cross_entropy_v) log_string += "loss={:<6f} ".format(train_loss_v) log_string += "learning_rate={:<8.4f} ".format( learning_rate_v) log_string += "training_top1_accuracy={:<8.4f} ".format( train_top1_accuracy_v) log_string += "training_top5_accuracy={:<8.4f} ".format( train_top5_accuracy_v) log_string += "mins={:<10.2f}".format( (curr_time - start_time) / 60) tf.logging.info(log_string) if global_step_v % params['batches_per_epoch'] == 0: test_ops = [ test_cross_entropy, test_loss, test_top1_accuracy, test_top5_accuracy, ] test_start_time = time.time() test_cross_entropy_list = [] test_loss_list = [] test_top1_accuracy_list = [] test_top5_accuracy_list = [] for _ in range(_NUM_IMAGES['test'] // 100): test_cross_entropy_v, test_loss_v, test_top1_accuracy_v, test_top5_accuracy_v = sess.run( test_ops) test_cross_entropy_list.append(test_cross_entropy_v) test_loss_list.append(test_loss_v) test_top1_accuracy_list.append(test_top1_accuracy_v) test_top5_accuracy_list.append(test_top5_accuracy_v) test_time = time.time() - test_start_time log_string = "Evaluation on test data\n" log_string += "epoch={:<6d} ".format(epoch) log_string += "step={:<6d} ".format(global_step_v) log_string += "cross_entropy={:<6f} ".format( np.mean(test_cross_entropy_list)) log_string += "loss={:<6f} ".format( np.mean(test_loss_list)) log_string += "learning_rate={:<8.6f} ".format( learning_rate_v) log_string += "test_top1_accuracy={:<8.6f} ".format( np.mean(test_top1_accuracy_list)) log_string += "test_top5_accuracy={:<8.6f} ".format( np.mean(test_top5_accuracy_list)) log_string += "secs={:<10.2f}".format((test_time)) tf.logging.info(log_string) if epoch >= params['train_epochs']: break