Exemplo n.º 1
0
def evaluate_model(model, med_data):
    """
    model: CaptioningRNN model
    Prints unigram BLEU score averaged over 1000 training and val examples.
    """
    BLEUscores = {}
    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(med_data,
                                          split=split,
                                          batch_size=1000)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        total_score = 0.0
        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            total_score += BLEU_score(gt_caption, sample_caption)

        BLEUscores[split] = total_score / len(sample_captions)

    for split in BLEUscores:
        print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))
Exemplo n.º 2
0
    def _evaluate_model(self, model):
        """
        model: CaptioningRNN model
        Prints unigram BLEU score averaged over 1000 training and val examples.
        """
        import sys
        BLEUscores = {}
        for split in ['train', 'val']:
            minibatch = sample_coco_minibatch(self.data,
                                              split=split,
                                              batch_size=1000)
            gt_captions, features, urls = minibatch
            gt_captions = decode_captions(gt_captions,
                                          self.data['idx_to_word'])

            sample_captions = model.sample(features)
            sample_captions = decode_captions(sample_captions,
                                              self.data['idx_to_word'])

            total_score = 0.0
            for gt_caption, sample_caption, url in zip(gt_captions,
                                                       sample_captions, urls):
                total_score += self._BLEU_score(gt_caption, sample_caption)

            BLEUscores[split] = total_score / len(sample_captions)

        for split in BLEUscores:
            print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))
        #print("Difference in train and val BLEU score is ", BLEUscores['train'] - BLEUscores['val'])
        if BLEUscores['val'] > 0.3 and BLEUscores['train'] > 0.3:
            return
Exemplo n.º 3
0
def evaluate_model(data, model):
    """
    model: CaptioningRNN model
    Prints unigram BLEU score averaged over 1000 training and val examples.
    """
    start = data['word_to_idx']['<START>']
    end = data['word_to_idx']['<END>']
    null = data['word_to_idx']['<NULL>']

    BLEUscores = {}
    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(data, split=split, batch_size=1000)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = model.sample(features, start, end, null)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        total_score = 0.0
        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            total_score += BLEU_score(gt_caption, sample_caption)

        BLEUscores[split] = total_score / len(sample_captions)

    for split in BLEUscores:
        print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))
Exemplo n.º 4
0
def overfit_small_data():
    """
    Similar to the Solver class that we used to train image classification models on the
    previous assignment, on this assignment we use a CaptioningSolver class to train
    image captioning models. Open the file cs231n/captioning_solver.py and read through
    the CaptioningSolver class; it should look very familiar.

    Once you have familiarized yourself with the API, run the following to make sure your
    model overfits a small sample of 100 training examples. You should see a final loss
    of less than 0.1.
    """
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_rnn_model = CaptioningRNN(
        cell_type='rnn',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
    )
    small_rnn_solver = CaptioningSolver(
        small_rnn_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.95,
        verbose=True,
        print_every=10,
    )
    small_rnn_solver.train()

    # Plot the training losses
    plt.plot(small_rnn_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        gt_captions, features, urls = sample_coco_minibatch(small_data,
                                                            split=split,
                                                            batch_size=2)
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_rnn_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
Exemplo n.º 5
0
def overfit_lstm_captioning_model():
    """You should see a final loss less than 0.5."""
    np.random.seed(231)

    small_data = load_coco_data(max_train=50)

    small_lstm_model = CaptioningRNN(
        cell_type='lstm',
        word_to_idx=data['word_to_idx'],
        input_dim=data['train_features'].shape[1],
        hidden_dim=512,
        wordvec_dim=256,
        dtype=np.float32,
    )

    small_lstm_solver = CaptioningSolver(
        small_lstm_model,
        small_data,
        update_rule='adam',
        num_epochs=50,
        batch_size=25,
        optim_config={
            'learning_rate': 5e-3,
        },
        lr_decay=0.995,
        verbose=True,
        print_every=10,
    )

    small_lstm_solver.train()

    # Plot the training losses
    plt.plot(small_lstm_solver.loss_history)
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.title('Training loss history')
    plt.show()

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(small_data,
                                          split=split,
                                          batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = small_lstm_model.sample(features)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
Exemplo n.º 6
0
def demo(data, model):
    start = data['word_to_idx']['<START>']
    end = data['word_to_idx']['<END>']
    null = data['word_to_idx']['<NULL>']

    for split in ['train', 'val']:
        minibatch = sample_coco_minibatch(data, split=split, batch_size=2)
        gt_captions, features, urls = minibatch
        gt_captions = decode_captions(gt_captions, data['idx_to_word'])

        sample_captions = model.sample(features, start, end, null)
        sample_captions = decode_captions(sample_captions, data['idx_to_word'])

        for gt_caption, sample_caption, url in zip(gt_captions,
                                                   sample_captions, urls):
            plt.imshow(image_from_url(url))
            plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
            plt.axis('off')
            plt.show()
    def check_bleu(self, split, num_samples, batch_size=100, check_loss=False):
        """
        Check accuracy of the model on the provided data.

        Inputs:
        - split: String 'train' or 'val'
        - num_samples: Subsample the data and only test the model on num_samples
          datapoints.
        - batch_size: Split data into batches of this size to avoid using too
          much memory.

        Returns:
        - bleu: Scalar giving the words that were correctly generated by the model.
        """

        # Subsample the data
        minibatch = sample_coco_minibatch(self.data,
                                          batch_size=num_samples,
                                          split=split)
        captions, features, urls = minibatch
        if check_loss: loss, _ = self.model.loss(features, captions)
        gt_captions = decode_captions(captions, self.data['idx_to_word'])

        # Compute word generations in batches
        num_batches = num_samples // batch_size
        if num_samples % batch_size != 0:
            num_batches += 1
        total_score = 0.0
        for i in range(num_batches):
            start = i * batch_size
            end = (i + 1) * batch_size
            sample_captions = self.model.sample(features[start:end])
            sample_captions = decode_captions(sample_captions,
                                              self.data['idx_to_word'])
            for gt_caption, sample_caption in zip(gt_captions[start:end],
                                                  sample_captions):
                total_score += BLEU_score(gt_caption, sample_caption)

        if check_loss:
            return loss, total_score / num_samples

        return total_score / num_samples
Exemplo n.º 8
0
    def check_accuracy(self, model):
        """
        Check accuracy of the model on the provided data.

        Inputs:
        - X: Array of data, of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,)
        - num_samples: If not None, subsample the data and only test the model
          on num_samples datapoints.
        - batch_size: Split X and y into batches of this size to avoid using too
          much memory.

        Returns:
        - acc: Scalar giving the fraction of instances that were correctly
          classified by the model.
        """

        BLEUscores = {}
        for split in ['train', 'val']:
            minibatch = sample_coco_minibatch(self.data,
                                              split=split,
                                              batch_size=1000)
            gt_captions, features, urls = minibatch
            gt_captions = decode_captions(gt_captions,
                                          self.data['idx_to_word'])

            sample_captions = model.sample(features)
            sample_captions = decode_captions(sample_captions,
                                              self.data['idx_to_word'])

            total_score = 0.0
            for gt_caption, sample_caption, url in zip(gt_captions,
                                                       sample_captions, urls):
                total_score += BLEU_score(gt_caption, sample_caption)

            BLEUscores[split] = total_score / len(sample_captions)

        self.train_acc_history.append(BLEUscores['train'])
        self.val_acc_history.append(BLEUscores['val'])
Exemplo n.º 9
0
#           batch_size=25,
#           optim_config={
#             'learning_rate': 5e-3,
#           },
#           lr_decay=0.995,
#           verbose=True, print_every=10,
#         )
#
#small_lstm_solver.train()
#
## Plot the training losses
#plt.plot(small_lstm_solver.loss_history)
#plt.xlabel('Iteration')
#plt.ylabel('Loss')
#plt.title('Training loss history')
#plt.show()

for split in ['train', 'val']:
    minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2)
    gt_captions, features, urls = minibatch
    gt_captions = decode_captions(gt_captions, data['idx_to_word'])

    sample_captions = small_lstm_model.sample(features)
    sample_captions = decode_captions(sample_captions, data['idx_to_word'])

    for gt_caption, sample_caption, url in zip(gt_captions, sample_captions,
                                               urls):
        plt.imshow(image_from_url(url))
        plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption))
        plt.axis('off')
        plt.show()
Exemplo n.º 10
0
# 
# You can use the `sample_coco_minibatch` function from the file `cs231n/coco_utils.py` to sample minibatches of data from the data structure returned from `load_coco_data`. Run the following to sample a small minibatch of training data and show the images and their captions. Running it multiple times and looking at the results helps you to get a sense of the dataset.
# 
# Note that we decode the captions using the `decode_captions` function and that we download the images on-the-fly using their Flickr URL, so **you must be connected to the internet to viw images**.

# In[ ]:

# Sample a minibatch and show the images and captions

batch_size = 3

captions, features, urls = sample_coco_minibatch(data, batch_size=batch_size)
for i, (caption, url) in enumerate(zip(captions, urls)):
  plt.imshow(image_from_url(url))
  plt.axis('off')
  caption_str = decode_captions(caption, data['idx_to_word'])
  plt.title(caption_str)
  plt.show()


# # Recurrent Neural Networks
# As discussed in lecture, we will use recurrent neural network (RNN) language models for image captioning. 
# The file `cs231n/rnn_layers.py` contains implementations of different layer types that are needed for recurrent 
# neural networks, and the file `cs231n/classifiers/rnn.py` uses these layers to implement an image captioning model.
# 
# We will first implement different types of RNN layers in `cs231n/rnn_layers.py`.

# # Vanilla RNN: step forward
# Open the file `cs231n/rnn_layers.py`. This file implements the forward and backward passes for different 
# types of layers that are commonly used in recurrent neural networks.
# 
Exemplo n.º 11
0
def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


data = load_coco_data(pca_features=True)

img_num = 13945
A = np.where(np.isin(data['train_image_idxs'], img_num))[0]
print(A.tolist())

plt.imshow(image_from_url(data['train_urls'][img_num]))
plt.axis('off')
plt.show()
for i in A:
    caption_str = decode_captions(data['train_captions'][i],
                                  data['idx_to_word'])
    print(caption_str)
exit()

# for k, v in data.items():
#     if type(v) == np.ndarray:
#         print(k, type(v), v.shape, v.dtype)
#     else:
#         print(k, type(v), len(v))

# Sanity check for temporal softmax loss

np.random.seed(231)

max_train = 10000
batch_size = 128
Exemplo n.º 12
0
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


# Load COCO data from disk; this returns a dictionary
# We'll work with dimensionality-reduced features for this notebook, but feel
# free to experiment with the original features by changing the flag below.
data = load_coco_data(pca_features=True)

# Print out all the keys and values from the data dictionary
for k, v in data.items():
    if type(v) == np.ndarray:
        print(k, type(v), v.shape, v.dtype)
    else:
        print(k, type(v), len(v))

# Sample a minibatch and show the images and captions
batch_size = 3

captions, features, urls = sample_coco_minibatch(data, batch_size=batch_size)
for i, (caption, url) in enumerate(zip(captions, urls)):
    plt.imshow(image_from_url(url))
    plt.axis('off')
    caption_str = decode_captions(caption, data['idx_to_word'])
    plt.title(caption_str)
    plt.show()