Пример #1
0
    def __init__(self, config):
        self.config = config
        mode = config['mode']
        dataset_dir = config['dataset_dir']
        initial_lr = config['initial_lr']
        batch_size = config['batch_size']
        final_endpoint = config['final_endpoint']

        tf.logging.set_verbosity(tf.logging.INFO)

        self.learning_rate = tf.Variable(initial_lr, trainable=False)
        self.lr_rate_placeholder = tf.placeholder(tf.float32)
        self.lr_rate_assign = self.learning_rate.assign(
            self.lr_rate_placeholder)

        self.dataset = get_split_with_text(mode, dataset_dir)
        image_size = inception_v1.default_image_size
        images, _, texts, seq_lens, self.labels, _, _ = load_batch_with_text(
            self.dataset, batch_size, height=image_size, width=image_size)

        self.nb_emotions = self.dataset.num_classes
        # Create the model, use the default arg scope to configure the batch norm parameters.
        is_training = (mode == 'train')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            self.logits, _ = inception_v1.inception_v1(
                images,
                final_endpoint=final_endpoint,
                num_classes=self.nb_emotions,
                is_training=is_training)
Пример #2
0
def oasis_evaluation(checkpoint_dir, num_classes):
    """Compute the logits of the OASIS dataset.
    
    Parameters:
        checkpoint_dir: Checkpoint of the saved model during training.
        num_classes: Number of classes.
    """
    with tf.Graph().as_default():
        config = _CONFIG.copy()
        mode = 'validation'
        dataset_dir = config['dataset_dir']
        text_dir = config['text_dir']
        emb_dir = config['emb_dir']
        filename = config['filename']
        initial_lr = config['initial_lr']
        #batch_size = config['batch_size']
        im_features_size = config['im_features_size']
        rnn_size = config['rnn_size']
        final_endpoint = config['final_endpoint']

        tf.logging.set_verbosity(tf.logging.INFO)

        batch_size = 1
        image_size = inception_v1.default_image_size
        images = tf.placeholder(tf.float32, [image_size, image_size, 3])
        images_prep = inception_preprocessing.preprocess_image(
            images, image_size, image_size, is_training=False)
        images_prep_final = tf.expand_dims(images_prep, 0)

        texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE])
        seq_lens = tf.placeholder(tf.int32, [batch_size])

        # Create the model, use the default arg scope to configure the batch norm parameters.
        is_training = (mode == 'train')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                images_prep_final,
                final_endpoint=final_endpoint,
                num_classes=im_features_size,
                is_training=is_training)

        # Text model
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))
        # Unknown words = vector with zeros
        embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        nb_emotions = num_classes
        with tf.variable_scope('Text'):
            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            input_embed = tf.nn.embedding_lookup(W_embedding, texts)

            # LSTM
            cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
            rnn_outputs, final_state = tf.nn.dynamic_rnn(
                cell, input_embed, sequence_length=seq_lens, dtype=tf.float32)
            # Need to convert seq_lens to int32 for stack
            texts_features = tf.gather_nd(
                rnn_outputs,
                tf.stack(
                    [tf.range(batch_size),
                     tf.cast(seq_lens, tf.int32) - 1],
                    axis=1))

        # Concatenate image and text features
        concat_features = tf.concat([images_features, texts_features], axis=1)

        # Dense layer
        W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size])
        b_fc = tf.get_variable('b_fc', [fc_size])
        dense_layer = tf.matmul(concat_features, W_fc) + b_fc
        dense_layer_relu = tf.nn.relu(dense_layer)

        W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [nb_emotions])
        logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax

        # Load model
        checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir)
        scaffold = monitored_session.Scaffold(init_op=None,
                                              init_feed_dict=None,
                                              init_fn=None,
                                              saver=None)
        session_creator = monitored_session.ChiefSessionCreator(
            scaffold=scaffold,
            checkpoint_filename_with_path=checkpoint_path,
            master='',
            config=None)

        # Load oasis dataset
        df_oasis = pd.read_csv('data/oasis/OASIS.csv', encoding='utf-8')

        def load_image(name):
            im_path = 'data/oasis/images/' + name.strip() + '.jpg'
            one_im = imread(im_path)
            one_im = imresize(one_im,
                              ((image_size, image_size,
                                3)))[:, :, :3]  # to get rid of alpha channel
            return one_im

        df_oasis['image'] = df_oasis['Theme'].map(lambda x: load_image(x))

        df_oasis['Theme'] = df_oasis['Theme'].map(
            lambda x: ''.join([i for i in x if not i.isdigit()]).strip())
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        word_to_id = dict(zip(vocabulary, range(len(vocabulary))))
        df_oasis['text_list'], df_oasis['text_len'] = zip(
            *df_oasis['Theme'].map(lambda x: _paragraph_to_ids(
                x, word_to_id, _POST_SIZE, emotions='')))
        with monitored_session.MonitoredSession(
                session_creator=session_creator, hooks=None) as session:

            nb_iter = df_oasis.shape[0] / batch_size
            scores = []
            for i in range(nb_iter):
                np_images = df_oasis['image'][(i * batch_size):((i + 1) *
                                                                batch_size)]
                np_texts = np.vstack(
                    df_oasis['text_list'][(i * batch_size):((i + 1) *
                                                            batch_size)])
                np_seq_lens = df_oasis['text_len'][(
                    i * batch_size):((i + 1) * batch_size)].values
                print(np_images.shape)
                session.run(images, feed_dict={images: np_images})
                print(np_texts.shape)
                session.run(texts, feed_dict={texts: np_texts})
                print(np_seq_lens.shape)
                session.run(seq_lens, feed_dict={seq_lens: np_seq_lens})
                #scores.append(session.run(logits, feed_dict={images: np_images, texts: np_texts, seq_lens: np_seq_lens}))
    scores = np.vstack(scores)
    np.save('data/oasis_logits.npy', scores)
    return scores
Пример #3
0
def word_most_relevant(top_words, num_classes, checkpoint_dir):
    """Compute gradient of W_embedding to get the word most relevant to a label.
    
    Parameters:
        checkpoint_dir: Checkpoint of the saved model during training.
    """
    with tf.Graph().as_default():
        config = _CONFIG.copy()
        mode = 'validation'
        dataset_dir = config['dataset_dir']
        text_dir = config['text_dir']
        emb_dir = config['emb_dir']
        filename = config['filename']
        initial_lr = config['initial_lr']
        #batch_size = config['batch_size']
        im_features_size = config['im_features_size']
        rnn_size = config['rnn_size']
        final_endpoint = config['final_endpoint']

        tf.logging.set_verbosity(tf.logging.INFO)

        batch_size = 50
        image_size = inception_v1.default_image_size
        images = tf.placeholder(tf.float32,
                                [batch_size, image_size, image_size, 3])
        texts = tf.placeholder(tf.int32, [batch_size, _POST_SIZE])
        seq_lens = tf.placeholder(tf.int32, [batch_size])

        # Create the model, use the default arg scope to configure the batch norm parameters.
        is_training = (mode == 'train')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                images,
                final_endpoint=final_endpoint,
                num_classes=im_features_size,
                is_training=is_training)

        # Text model
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))
        # Unknown words = vector with zeros
        embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        nb_emotions = num_classes
        with tf.variable_scope('Text'):
            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            input_embed = tf.nn.embedding_lookup(W_embedding, texts)

            # LSTM
            cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
            rnn_outputs, final_state = tf.nn.dynamic_rnn(
                cell, input_embed, sequence_length=seq_lens, dtype=tf.float32)
            # Need to convert seq_lens to int32 for stack
            texts_features = tf.gather_nd(
                rnn_outputs,
                tf.stack(
                    [tf.range(batch_size),
                     tf.cast(seq_lens, tf.int32) - 1],
                    axis=1))

        # Concatenate image and text features
        concat_features = tf.concat([images_features, texts_features], axis=1)

        # Dense layer
        W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size])
        b_fc = tf.get_variable('b_fc', [fc_size])
        dense_layer = tf.matmul(concat_features, W_fc) + b_fc
        dense_layer_relu = tf.nn.relu(dense_layer)

        W_softmax = tf.get_variable('W_softmax', [fc_size, nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [nb_emotions])
        logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax

        # Initialise image
        #image_init = tf.random_normal([image_size, image_size, 3])
        #image_init = inception_preprocessing.preprocess_image(image_init, image_size, image_size, is_training=False)
        #image_init = tf.expand_dims(image_init, 0)

        # Load model
        checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir)
        scaffold = monitored_session.Scaffold(init_op=None,
                                              init_feed_dict=None,
                                              init_fn=None,
                                              saver=None)
        session_creator = monitored_session.ChiefSessionCreator(
            scaffold=scaffold,
            checkpoint_filename_with_path=checkpoint_path,
            master='',
            config=None)

        with monitored_session.MonitoredSession(
                session_creator=session_creator, hooks=None) as session:

            nb_iter = len(top_words) / batch_size
            scores = []
            for i in range(nb_iter):
                np_images = np.zeros((batch_size, image_size, image_size, 3))
                np_texts = np.ones((batch_size, _POST_SIZE),
                                   dtype=np.int32) * (vocab_size - 1)
                np_texts[:, 0] = top_words[i * batch_size:(i + 1) * batch_size]
                np_seq_lens = np.ones(batch_size, dtype=np.int32)
                scores.append(
                    session.run(logits,
                                feed_dict={
                                    images: np_images,
                                    texts: np_texts,
                                    seq_lens: np_seq_lens
                                }))
    scores = np.vstack(scores)
    np.save('data/top_words_scores.npy', scores)
    np.save('data/top_words.npy', top_words)
    return scores, vocabulary, word_to_id
Пример #4
0
    def __init__(self, config):
        self.config = config
        mode = config['mode']
        dataset_dir = config['dataset_dir']
        text_dir = config['text_dir']
        emb_dir = config['emb_dir']
        filename = config['filename']
        initial_lr = config['initial_lr']
        batch_size = config['batch_size']
        im_features_size = config['im_features_size']
        rnn_size = config['rnn_size']
        final_endpoint = config['final_endpoint']
        fc_size = config['fc_size']

        tf.logging.set_verbosity(tf.logging.INFO)

        self.learning_rate = tf.Variable(initial_lr, trainable=False)
        self.lr_rate_placeholder = tf.placeholder(tf.float32)
        self.lr_rate_assign = self.learning_rate.assign(
            self.lr_rate_placeholder)

        self.dataset = get_split_with_text(mode, dataset_dir)
        image_size = inception_v1.default_image_size
        images, _, texts, seq_lens, self.labels, self.post_ids, self.days = load_batch_with_text(
            self.dataset, batch_size, height=image_size, width=image_size)

        # Create the model, use the default arg scope to configure the batch norm parameters.
        is_training = (mode == 'train')
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                images,
                final_endpoint=final_endpoint,
                num_classes=im_features_size,
                is_training=is_training)

        # Text model
        vocabulary, self.embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = self.embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))
        # Unknown words = vector with zeros
        self.embedding = np.concatenate(
            [self.embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        self.nb_emotions = self.dataset.num_classes
        with tf.variable_scope('Text'):
            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            self.embedding_placeholder = tf.placeholder(
                tf.float32, [vocab_size, embedding_dim])
            self.embedding_init = W_embedding.assign(
                self.embedding_placeholder)
            input_embed = tf.nn.embedding_lookup(W_embedding, texts)
            #input_embed_dropout = tf.nn.dropout(input_embed, self.keep_prob)

            # LSTM
            cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
            rnn_outputs, final_state = tf.nn.dynamic_rnn(
                cell, input_embed, sequence_length=seq_lens, dtype=tf.float32)
            # Need to convert seq_lens to int32 for stack
            texts_features = tf.gather_nd(
                rnn_outputs,
                tf.stack(
                    [tf.range(batch_size),
                     tf.cast(seq_lens, tf.int32) - 1],
                    axis=1))

        # Concatenate image and text features
        self.concat_features = tf.concat([images_features, texts_features],
                                         axis=1)

        # Dense layer
        W_fc = tf.get_variable('W_fc', [im_features_size + rnn_size, fc_size])
        b_fc = tf.get_variable('b_fc', [fc_size])
        dense_layer = tf.matmul(self.concat_features, W_fc) + b_fc
        dense_layer_relu = tf.nn.relu(dense_layer)

        W_softmax = tf.get_variable('W_softmax', [fc_size, self.nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [self.nb_emotions])
        self.logits = tf.matmul(dense_layer_relu, W_softmax) + b_softmax
Пример #5
0
def class_visualisation(label, learning_rate, checkpoint_dir):
    """Visualise class with gradient ascent.
    
    Parameters:
        label: Label to visualise.
        learning_rate: Learning rate of the gradient ascent.
        checkpoint_dir: Checkpoint of the saved model during training.
    """
    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)

        image_size = inception_v1.default_image_size
        image = tf.placeholder(tf.float32, [1, image_size, image_size, 3])

        # Text model
        text_dir = 'text_model'
        emb_dir = 'embedding_weights'
        filename = 'glove.6B.50d.txt'
        vocabulary, embedding = _load_embedding_weights_glove(
            text_dir, emb_dir, filename)
        vocab_size, embedding_dim = embedding.shape
        word_to_id = dict(zip(vocabulary, range(vocab_size)))

        # Create text with only unknown words
        text = tf.constant(
            np.ones((1, _POST_SIZE), dtype=np.int32) * vocab_size)

        im_features_size = 128
        # Create the model, use the default arg scope to configure the batch norm parameters.
        with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
            images_features, _ = inception_v1.inception_v1(
                image, num_classes=im_features_size, is_training=True)

        # Unknown words = vector with zeros
        embedding = np.concatenate([embedding, np.zeros((1, embedding_dim))])
        word_to_id['<ukn>'] = vocab_size

        vocab_size = len(word_to_id)
        nb_emotions = 6
        with tf.variable_scope('Text'):
            embedding_placeholder = tf.placeholder(tf.float32,
                                                   [vocab_size, embedding_dim])

            # Word embedding
            W_embedding = tf.get_variable('W_embedding',
                                          [vocab_size, embedding_dim],
                                          trainable=False)
            embedding_init = W_embedding.assign(embedding_placeholder)
            input_embed = tf.nn.embedding_lookup(W_embedding, text)
            #input_embed_dropout = tf.nn.dropout(input_embed, self.keep_prob)

            # Rescale the mean by the actual number of non-zero values.
            nb_finite = tf.reduce_sum(tf.cast(tf.not_equal(input_embed, 0.0),
                                              tf.float32),
                                      axis=1)
            # If a post has zero finite elements, replace nb_finite by 1
            nb_finite = tf.where(tf.equal(nb_finite, 0.0),
                                 tf.ones_like(nb_finite), nb_finite)
            h1 = tf.reduce_mean(input_embed, axis=1) * _POST_SIZE / nb_finite

            fc1_size = 2048
            # Fully connected layer
            W_fc1 = tf.get_variable('W_fc1', [embedding_dim, fc1_size])
            b_fc1 = tf.get_variable('b_fc1', [fc1_size])
            texts_features = tf.matmul(h1, W_fc1) + b_fc1
            texts_features = tf.nn.relu(texts_features)

        # Concatenate image and text features
        concat_features = tf.concat([images_features, texts_features], axis=1)

        W_softmax = tf.get_variable('W_softmax',
                                    [im_features_size + fc1_size, nb_emotions])
        b_softmax = tf.get_variable('b_softmax', [nb_emotions])
        logits = tf.matmul(concat_features, W_softmax) + b_softmax

        class_score = logits[:, label]
        l2_reg = 0.001
        regularisation = l2_reg * tf.square(tf.norm(image))
        obj_function = class_score - regularisation
        grad_obj_function = tf.gradients(obj_function, image)[0]
        grad_normalized = grad_obj_function / tf.norm(grad_obj_function)

        # Initialise image
        image_init = tf.random_normal([image_size, image_size, 3])
        image_init = inception_preprocessing.preprocess_image(
            image_init, image_size, image_size, is_training=False)
        image_init = tf.expand_dims(image_init, 0)

        # Load model
        checkpoint_path = tf_saver.latest_checkpoint(checkpoint_dir)
        scaffold = monitored_session.Scaffold(init_op=None,
                                              init_feed_dict=None,
                                              init_fn=None,
                                              saver=None)
        session_creator = monitored_session.ChiefSessionCreator(
            scaffold=scaffold,
            checkpoint_filename_with_path=checkpoint_path,
            master='',
            config=None)

        blur_every = 10
        max_jitter = 16
        show_every = 50
        clip_percentile = 20

        with monitored_session.MonitoredSession(
                session_creator=session_creator, hooks=None) as session:
            np_image = session.run(image_init)
            num_iterations = 500
            for i in range(num_iterations):
                # Randomly jitter the image a bit
                ox, oy = np.random.randint(-max_jitter, max_jitter + 1, 2)
                np_image = np.roll(np.roll(np_image, ox, 1), oy, 2)

                # Update image
                grad_update = session.run(grad_normalized,
                                          feed_dict={image: np_image})
                np_image += learning_rate * grad_update

                # Undo the jitter
                np_image = np.roll(np.roll(np_image, -ox, 1), -oy, 2)

                # As a regularizer, clip and periodically blur
                #np_image = np.clip(np_image, -0.2, 0.8)
                # Set pixels with small norm to zero
                min_norm = np.percentile(np_image, clip_percentile)
                np_image[np_image < min_norm] = 0.0
                if i % blur_every == 0:
                    np_image = blur_image(np_image, sigma=0.5)

                if i % show_every == 0 or i == (num_iterations - 1):
                    plt.imshow(deprocess_image(np_image[0]))
                    plt.title('Iteration %d / %d' % (i + 1, num_iterations))
                    plt.gcf().set_size_inches(4, 4)
                    plt.axis('off')
                    plt.show()