示例#1
0
def load_batch_with_text(dataset,
                         batch_size=32,
                         shuffle=True,
                         height=299,
                         width=299,
                         is_training=False):
    """Load a single batch of data.

    Args:
      dataset: The dataset to load.
      batch_size: The number of images in the batch.
      shuffle: Whether to shuffle the data sources and common queue when reading.
      height: The size of each image after preprocessing.
      width: The size of each image after preprocessing.
      is_training: Whether or not we're currently training or evaluating.

    Returns:
      images: A Tensor of size [batch_size, height, width, 3], image samples that have been preprocessed.
      images_raw: A Tensor of size [batch_size, height, width, 3], image samples that can be used for visualization.
      labels: A Tensor of size [batch_size], whose values range between 0 and dataset.num_classes.
    """
    # For validation, if you set the common_queue_capacity to something lower than
    # batch_size, which is the validation size, then your output will contain duplicates.
    data_provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=shuffle,
        common_queue_capacity=batch_size,
        common_queue_min=8)
    image_raw, text, seq_len, label, post_id, day = data_provider.get(
        ['image', 'text', 'seq_len', 'label', 'post_id', 'day'])

    # Preprocess image for usage by Inception.
    image = inception_preprocessing.preprocess_image(image_raw,
                                                     height,
                                                     width,
                                                     is_training=is_training)

    # Preprocess the image for display purposes.
    image_raw = tf.expand_dims(image_raw, 0)
    image_raw = tf.image.resize_images(image_raw, [height, width])
    image_raw = tf.squeeze(image_raw)

    # Batch it up.
    images, images_raw, texts, seq_lens, labels, post_ids, days = tf.train.batch(
        [image, image_raw, text, seq_len, label, post_id, day],
        batch_size=batch_size,
        num_threads=1,
        capacity=2 * batch_size)

    return images, images_raw, texts, seq_lens, labels, post_ids, days
示例#2
0
def oasis_evaluation(checkpoint_dir, num_classes):
    """Compute the logits of the OASIS dataset.
    
    Parameters:
        checkpoint_dir: Checkpoint of the saved model during training.
        num_classes: Number of classes.
    """
    with tf.Graph().as_default():
        config = _CONFIG.copy()
        mode = 'validation'
        dataset_dir = config['dataset_dir']
        text_dir = config['text_dir']
        emb_dir = config['emb_dir']
        filename = config['filename']
        initial_lr = config['initial_lr']
        #batch_size = config['batch_size']
        im_features_size = config['im_features_size']
        rnn_size = config['rnn_size']
        final_endpoint = config['final_endpoint']

        tf.logging.set_verbosity(tf.logging.INFO)

        batch_size = 1
        image_size = inception_v1.default_image_size
        images = tf.placeholder(tf.float32, [image_size, image_size, 3])
        images_prep = inception_preprocessing.preprocess_image(
            images, image_size, image_size, is_training=False)
        images_prep_final = tf.expand_dims(images_prep, 0)

        texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE])
        seq_lens = tf.placeholder(tf.int32, [batch_size])

        # Create the model, use the default arg scope to configure the batch norm parameters.
        is_training = (mode == 'train')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                images_prep_final,
                final_endpoint=final_endpoint,
                num_classes=im_features_size,
                is_training=is_training)

        # Text model
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))
        # Unknown words = vector with zeros
        embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        nb_emotions = num_classes
        with tf.variable_scope('Text'):
            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            input_embed = tf.nn.embedding_lookup(W_embedding, texts)

            # LSTM
            cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
            rnn_outputs, final_state = tf.nn.dynamic_rnn(
                cell, input_embed, sequence_length=seq_lens, dtype=tf.float32)
            # Need to convert seq_lens to int32 for stack
            texts_features = tf.gather_nd(
                rnn_outputs,
                tf.stack(
                    [tf.range(batch_size),
                     tf.cast(seq_lens, tf.int32) - 1],
                    axis=1))

        # Concatenate image and text features
        concat_features = tf.concat([images_features, texts_features], axis=1)

        # Dense layer
        W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size])
        b_fc = tf.get_variable('b_fc', [fc_size])
        dense_layer = tf.matmul(concat_features, W_fc) + b_fc
        dense_layer_relu = tf.nn.relu(dense_layer)

        W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [nb_emotions])
        logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax

        # Load model
        checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir)
        scaffold = monitored_session.Scaffold(init_op=None,
                                              init_feed_dict=None,
                                              init_fn=None,
                                              saver=None)
        session_creator = monitored_session.ChiefSessionCreator(
            scaffold=scaffold,
            checkpoint_filename_with_path=checkpoint_path,
            master='',
            config=None)

        # Load oasis dataset
        df_oasis = pd.read_csv('data/oasis/OASIS.csv', encoding='utf-8')

        def load_image(name):
            im_path = 'data/oasis/images/' + name.strip() + '.jpg'
            one_im = imread(im_path)
            one_im = imresize(one_im,
                              ((image_size, image_size,
                                3)))[:, :, :3]  # to get rid of alpha channel
            return one_im

        df_oasis['image'] = df_oasis['Theme'].map(lambda x: load_image(x))

        df_oasis['Theme'] = df_oasis['Theme'].map(
            lambda x: ''.join([i for i in x if not i.isdigit()]).strip())
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        word_to_id = dict(zip(vocabulary, range(len(vocabulary))))
        df_oasis['text_list'], df_oasis['text_len'] = zip(
            *df_oasis['Theme'].map(lambda x: _paragraph_to_ids(
                x, word_to_id, _POST_SIZE, emotions='')))
        with monitored_session.MonitoredSession(
                session_creator=session_creator, hooks=None) as session:

            nb_iter = df_oasis.shape[0] / batch_size
            scores = []
            for i in range(nb_iter):
                np_images = df_oasis['image'][(i * batch_size):((i + 1) *
                                                                batch_size)]
                np_texts = np.vstack(
                    df_oasis['text_list'][(i * batch_size):((i + 1) *
                                                            batch_size)])
                np_seq_lens = df_oasis['text_len'][(
                    i * batch_size):((i + 1) * batch_size)].values
                print(np_images.shape)
                session.run(images, feed_dict={images: np_images})
                print(np_texts.shape)
                session.run(texts, feed_dict={texts: np_texts})
                print(np_seq_lens.shape)
                session.run(seq_lens, feed_dict={seq_lens: np_seq_lens})
                #scores.append(session.run(logits, feed_dict={images: np_images, texts: np_texts, seq_lens: np_seq_lens}))
    scores = np.vstack(scores)
    np.save('data/oasis_logits.npy', scores)
    return scores
示例#3
0
def class_visualisation(label, learning_rate, checkpoint_dir):
    """Visualise class with gradient ascent.
    
    Parameters:
        label: Label to visualise.
        learning_rate: Learning rate of the gradient ascent.
        checkpoint_dir: Checkpoint of the saved model during training.
    """
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)

        image_size = inception_v1.default_image_size
        image = tf.placeholder(tf.float32, [1, image_size, image_size, 3])

        # Text model
        text_dir = 'text_model'
        emb_dir = 'embedding_weights'
        filename = 'glove.6B.50d.txt'
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))

        # Create text with only unknown words
        text = tf.constant(
            np.ones((1, _POST_SIZE), dtype=np.int32) * vocab_size)

        im_features_size = 128
        # Create the model, use the default arg scope to configure the batch norm parameters.
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                image, num_classes=im_features_size, is_training=True)

        # Unknown words = vector with zeros
        embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        nb_emotions = 6
        with tf.variable_scope('Text'):
            embedding_placeholder = tf.placeholder(tf.float32,
                                                   [vocab_size, embedding_dim])

            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            embedding_init = W_embedding.assign(embedding_placeholder)
            input_embed = tf.nn.embedding_lookup(W_embedding, text)
            #input_embed_dropout = tf.nn.dropout(input_embed, self.keep_prob)

            # Rescale the mean by the actual number of non-zero values.
            nb_finite = tf.reduce_sum(tf.cast(tf.not_equal(input_embed, 0.0),
                                              tf.float32),
                                      axis=1)
            # If a post has zero finite elements, replace nb_finite by 1
            nb_finite = tf.where(tf.equal(nb_finite, 0.0),
                                 tf.ones_like(nb_finite), nb_finite)
            h1 = tf.reduce_mean(input_embed, axis=1) * _POST_SIZE / nb_finite

            fc1_size = 2048
            # Fully connected layer
            W_fc1 = tf.get_variable('W_fc1', [embedding_dim, fc1_size])
            b_fc1 = tf.get_variable('b_fc1', [fc1_size])
            texts_features = tf.matmul(h1, W_fc1) + b_fc1
            texts_features = tf.nn.relu(texts_features)

        # Concatenate image and text features
        concat_features = tf.concat([images_features, texts_features], axis=1)

        W_softmax = tf.get_variable('W_softmax',
                                    [im_features_size + fc1_size, nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [nb_emotions])
        logits = tf.matmul(concat_features, W_softmax) + b_softmax

        class_score = logits[:, label]
        l2_reg = 0.001
        regularisation = l2_reg * tf.square(tf.norm(image))
        obj_function = class_score - regularisation
        grad_obj_function = tf.gradients(obj_function, image)[0]
        grad_normalized = grad_obj_function / tf.norm(grad_obj_function)

        # Initialise image
        image_init = tf.random_normal([image_size, image_size, 3])
        image_init = inception_preprocessing.preprocess_image(
            image_init, image_size, image_size, is_training=False)
        image_init = tf.expand_dims(image_init, 0)

        # Load model
        checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir)
        scaffold = monitored_session.Scaffold(init_op=None,
                                              init_feed_dict=None,
                                              init_fn=None,
                                              saver=None)
        session_creator = monitored_session.ChiefSessionCreator(
            scaffold=scaffold,
            checkpoint_filename_with_path=checkpoint_path,
            master='',
            config=None)

        blur_every = 10
        max_jitter = 16
        show_every = 50
        clip_percentile = 20

        with monitored_session.MonitoredSession(
                session_creator=session_creator, hooks=None) as session:
            np_image = session.run(image_init)
            num_iterations = 500
            for i in range(num_iterations):
                # Randomly jitter the image a bit
                ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2)
                np_image = np.roll(np.roll(np_image, ox, 1), oy, 2)

                # Update image
                grad_update = session.run(grad_normalized,
                                          feed_dict={image: np_image})
                np_image += learning_rate * grad_update

                # Undo the jitter
                np_image = np.roll(np.roll(np_image, -ox, 1), -oy, 2)

                # As a regularizer, clip and periodically blur
                #np_image = np.clip(np_image, -0.2, 0.8)
                # Set pixels with small norm to zero
                min_norm = np.percentile(np_image, clip_percentile)
                np_image[np_image < min_norm] = 0.0
                if i % blur_every == 0:
                    np_image = blur_image(np_image, sigma=0.5)

                if i % show_every == 0 or i == (num_iterations - 1):
                    plt.imshow(deprocess_image(np_image[0]))
                    plt.title('Iteration %d / %d' % (i + 1, num_iterations))
                    plt.gcf().set_size_inches(4, 4)
                    plt.axis('off')
                    plt.show()
示例#4
0
def train(params):
    g = tf.Graph()
    with g.as_default(), tf.device('/cpu:0'):
        tf.set_random_seed(params['seed'])
        dataset_train = imagenet.get_split('train', params['data_dir'])
        provider_train = tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
            dataset_train,
            num_readers=4,
            common_queue_capacity=20 * params['batch_size'],
            common_queue_min=10 * params['batch_size'],
        )
        [image, label] = provider_train.get(['image', 'label'])
        label -= params['labels_offset']  #[1,1000] to [0,999]
        image = inception_preprocessing.preprocess_image(
            image, params['train_image_size'], params['train_image_size'],
            True)
        images_train, labels_train = tf.train.batch(
            [image, label],
            batch_size=params['batch_size'],
            num_threads=4,
            capacity=5 * params['batch_size'])
        labels_train = tf.contrib.slim.one_hot_encoding(
            labels_train, dataset_train.num_classes - params['labels_offset'])

        dataset_valid = imagenet.get_split('validation', params['data_dir'],
                                           'valid')
        provider_valid = tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
            dataset_valid,
            num_readers=4,
            common_queue_capacity=20 * 100,
            common_queue_min=10 * 100,
        )
        [image, label] = provider_valid.get(['image', 'label'])
        label -= params['labels_offset']  # [1,1000] to [0,999]
        image = inception_preprocessing.preprocess_image(
            image, params['eval_image_size'], params['eval_image_size'], False)
        images_valid, labels_valid = tf.train.batch([image, label],
                                                    batch_size=100,
                                                    num_threads=4,
                                                    capacity=5 * 100)
        labels_valid = tf.contrib.slim.one_hot_encoding(
            labels_valid, dataset_valid.num_classes - params['labels_offset'])

        train_cross_entropy, train_loss, learning_rate, train_top1_accuracy, train_top5_accuracy, train_op, global_step = get_train_ops(
            images_train, labels_train, params)
        _log_variable_sizes(tf.trainable_variables(), 'Trainable Variables')
        test_cross_entropy, test_loss, test_top1_accuracy, test_top5_accuracy = get_test_ops(
            images_valid, labels_valid, params, True)
        saver = tf.train.Saver(max_to_keep=30)
        checkpoint_saver_hook = tf.train.CheckpointSaverHook(
            params['model_dir'],
            save_steps=params['batches_per_epoch'],
            saver=saver)
        hooks = [checkpoint_saver_hook]
        tf.logging.info('Starting Session')
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.train.SingularMonitoredSession(
                config=config, hooks=hooks,
                checkpoint_dir=params['model_dir']) as sess:
            start_time = time.time()
            calcluate_flops(g, sess)
            while True:
                run_ops = [
                    train_cross_entropy, train_loss, learning_rate,
                    train_top1_accuracy, train_top5_accuracy, train_op,
                    global_step
                ]
                train_cross_entropy_v, train_loss_v, learning_rate_v, train_top1_accuracy_v, train_top5_accuracy_v, _, global_step_v = sess.run(
                    run_ops)

                epoch = global_step_v // params['batches_per_epoch']
                curr_time = time.time()
                if global_step_v % 100 == 0:
                    log_string = "epoch={:<6d} ".format(epoch)
                    log_string += "step={:<6d} ".format(global_step_v)
                    log_string += "cross_entropy={:<6f} ".format(
                        train_cross_entropy_v)
                    log_string += "loss={:<6f} ".format(train_loss_v)
                    log_string += "learning_rate={:<8.4f} ".format(
                        learning_rate_v)
                    log_string += "training_top1_accuracy={:<8.4f} ".format(
                        train_top1_accuracy_v)
                    log_string += "training_top5_accuracy={:<8.4f} ".format(
                        train_top5_accuracy_v)
                    log_string += "mins={:<10.2f}".format(
                        (curr_time - start_time) / 60)
                    tf.logging.info(log_string)
                if global_step_v % params['batches_per_epoch'] == 0:
                    test_ops = [
                        test_cross_entropy,
                        test_loss,
                        test_top1_accuracy,
                        test_top5_accuracy,
                    ]
                    test_start_time = time.time()
                    test_cross_entropy_list = []
                    test_loss_list = []
                    test_top1_accuracy_list = []
                    test_top5_accuracy_list = []
                    for _ in range(_NUM_IMAGES['test'] // 100):
                        test_cross_entropy_v, test_loss_v, test_top1_accuracy_v, test_top5_accuracy_v = sess.run(
                            test_ops)
                        test_cross_entropy_list.append(test_cross_entropy_v)
                        test_loss_list.append(test_loss_v)
                        test_top1_accuracy_list.append(test_top1_accuracy_v)
                        test_top5_accuracy_list.append(test_top5_accuracy_v)
                    test_time = time.time() - test_start_time
                    log_string = "Evaluation on test data\n"
                    log_string += "epoch={:<6d} ".format(epoch)
                    log_string += "step={:<6d} ".format(global_step_v)
                    log_string += "cross_entropy={:<6f} ".format(
                        np.mean(test_cross_entropy_list))
                    log_string += "loss={:<6f} ".format(
                        np.mean(test_loss_list))
                    log_string += "learning_rate={:<8.6f} ".format(
                        learning_rate_v)
                    log_string += "test_top1_accuracy={:<8.6f} ".format(
                        np.mean(test_top1_accuracy_list))
                    log_string += "test_top5_accuracy={:<8.6f} ".format(
                        np.mean(test_top5_accuracy_list))
                    log_string += "secs={:<10.2f}".format((test_time))
                    tf.logging.info(log_string)
                if epoch >= params['train_epochs']:
                    break