def train(self): """ Run optimization to train the model. """ num_train = self.data['train_captions'].shape[0] iterations_per_epoch = max(num_train / self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in xrange(num_iterations): self._step() # Maybe print training loss if self.verbose and t % self.print_every == 0: print '(Iteration %d / %d) loss: %f' % ( t + 1, num_iterations, self.loss_history[-1]) # At the end of every epoch, increment the epoch counter and decay the # learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]['learning_rate'] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. # TODO: Implement some logic to check Bleu on validation set periodically first_it = (t == 0) last_it = (t == num_iterations + 1) if first_it or last_it or epoch_end: trainCaptions, trainFeatures, _ = sample_coco_minibatch(self.data, batch_size=100, split='train') valCaptions, valFeatures, _ = sample_coco_minibatch(self.data, batch_size=100, split='val') train_acc = self.check_accuracy(trainCaptions, trainFeatures) val_acc = self.check_accuracy(valCaptions, valFeatures) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) if self.verbose: print '(Epoch %d / %d) train acc: %f; val_acc: %f' % ( self.epoch, self.num_epochs, train_acc, val_acc) # Keep track of the best model if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.best_params = {} for k, v in self.model.params.iteritems(): self.best_params[k] = v.copy() # At the end of training swap the best params into the model self.model.params = self.best_params
def train(self): """ Run optimization to train the model. """ num_train = self.data["train_captions"].shape[0] iterations_per_epoch = max(num_train / self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in xrange(num_iterations): self._step() # Maybe print training loss if self.verbose and t % self.print_every == 0: print "(Iteration %d / %d) loss: %f" % (t + 1, num_iterations, self.loss_history[-1]) # At the end of every epoch, increment the epoch counter and decay the # learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]["learning_rate"] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. # TODO: Implement some logic to check Bleu on validation set periodically first_it = t == 0 last_it = t == num_iterations + 1 if first_it or last_it or epoch_end: trainCaptions, trainFeatures, _ = sample_coco_minibatch(self.data, batch_size=100, split="train") valCaptions, valFeatures, _ = sample_coco_minibatch(self.data, batch_size=100, split="val") train_acc = self.check_accuracy(trainCaptions, trainFeatures) val_acc = self.check_accuracy(valCaptions, valFeatures) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) if self.verbose: print "(Epoch %d / %d) train acc: %f; val_acc: %f" % ( self.epoch, self.num_epochs, train_acc, val_acc, ) # Keep track of the best model if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.best_params = {} for k, v in self.model.params.iteritems(): self.best_params[k] = v.copy() # At the end of training swap the best params into the model self.model.params = self.best_params
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') captions, features, urls = minibatch # Compute loss and gradient loss, grads = self.model.loss(features, captions) self.loss_history.append(loss) # Perform a parameter update for p, w in self.model.params.iteritems(): dw = grads[p] config = self.optim_configs[p] try: assert w.shape==dw.shape except: print (p, w.shape, dw.shape) raise Exception next_w, next_config = self.update_rule(w, dw, config) self.model.params[p] = next_w self.optim_configs[p] = next_config
def evaluate_model(data, model): """ model: CaptioningRNN model Prints unigram BLEU score averaged over 1000 training and val examples. """ start = data['word_to_idx']['<START>'] end = data['word_to_idx']['<END>'] null = data['word_to_idx']['<NULL>'] BLEUscores = {} for split in ['train', 'val']: minibatch = sample_coco_minibatch(data, split=split, batch_size=1000) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = model.sample(features, start, end, null) sample_captions = decode_captions(sample_captions, data['idx_to_word']) total_score = 0.0 for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): total_score += BLEU_score(gt_caption, sample_caption) BLEUscores[split] = total_score / len(sample_captions) for split in BLEUscores: print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))
def evaluate_model(model, med_data): """ model: CaptioningRNN model Prints unigram BLEU score averaged over 1000 training and val examples. """ BLEUscores = {} for split in ['train', 'val']: minibatch = sample_coco_minibatch(med_data, split=split, batch_size=1000) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) total_score = 0.0 for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): total_score += BLEU_score(gt_caption, sample_caption) BLEUscores[split] = total_score / len(sample_captions) for split in BLEUscores: print('Average BLEU score for %s: %f' % (split, BLEUscores[split]))
def _evaluate_model(self, model): """ model: CaptioningRNN model Prints unigram BLEU score averaged over 1000 training and val examples. """ import sys BLEUscores = {} for split in ['train', 'val']: minibatch = sample_coco_minibatch(self.data, split=split, batch_size=1000) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, self.data['idx_to_word']) sample_captions = model.sample(features) sample_captions = decode_captions(sample_captions, self.data['idx_to_word']) total_score = 0.0 for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): total_score += self._BLEU_score(gt_caption, sample_caption) BLEUscores[split] = total_score / len(sample_captions) for split in BLEUscores: print('Average BLEU score for %s: %f' % (split, BLEUscores[split])) #print("Difference in train and val BLEU score is ", BLEUscores['train'] - BLEUscores['val']) if BLEUscores['val'] > 0.3 and BLEUscores['train'] > 0.3: return
def overfit_small_data(): """ Similar to the Solver class that we used to train image classification models on the previous assignment, on this assignment we use a CaptioningSolver class to train image captioning models. Open the file cs231n/captioning_solver.py and read through the CaptioningSolver class; it should look very familiar. Once you have familiarized yourself with the API, run the following to make sure your model overfits a small sample of 100 training examples. You should see a final loss of less than 0.1. """ np.random.seed(231) small_data = load_coco_data(max_train=50) small_rnn_model = CaptioningRNN( cell_type='rnn', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, ) small_rnn_solver = CaptioningSolver( small_rnn_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.95, verbose=True, print_every=10, ) small_rnn_solver.train() # Plot the training losses plt.plot(small_rnn_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() for split in ['train', 'val']: gt_captions, features, urls = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_rnn_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
def _step(self): captions, image_features, urls = sample_coco_minibatch(data=self.data, batch_size=self.batch_size) loss, grads = self.model.loss(image_features, captions) self.loss_history.append(loss) for name, param in self.model.params.items(): dparam = grads[name] config = self.optim_configs[name] self.model.params[name], self.optim_configs[name] = self.update_rule(param, dparam, config=config)
def train(self): num_train = self.data['train_captions'].shape[0] iterations_per_epoch = max(num_train // self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in range(num_iterations): self._step() if self.verbose and t % self.p_num == 0: print('(Iteration %d / %d) loss: % f' % (t + 1, num_iterations, self.loss_history[-1])) epoch_end = (t + 1) % iterations_per_epoch == 0 # 训练完数据一轮 if epoch_end: self.epoch += 1 for k in self.optim_params: self.optim_params[k]['learning_rate'] *= self.lr_decay first_it = (t == 0) last_it = (t == num_iterations + 1) if first_it or last_it or epoch_end: train_captions, train_image_features, _ = sample_coco_minibatch( self.data, batch_size=1000, split='train') train_acc = self.check_accuracy(train_image_features, train_captions) val_captions, val_image_features, _ = sample_coco_minibatch( self.data, batch_size=1000, split='val') val_acc = self.check_accuracy(val_image_features, val_captions) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) if self.verbose: print('(Epoch %d / %d), train acc: %f, val_acc: %f)' % (self.epoch, self.num_epochs, train_acc, val_acc)) if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.best_params = {} for k, v in self.model.params.items(): self.best_params[k] = v.copy() self.model.params = self.best_params
def overfit_lstm_captioning_model(): """You should see a final loss less than 0.5.""" np.random.seed(231) small_data = load_coco_data(max_train=50) small_lstm_model = CaptioningRNN( cell_type='lstm', word_to_idx=data['word_to_idx'], input_dim=data['train_features'].shape[1], hidden_dim=512, wordvec_dim=256, dtype=np.float32, ) small_lstm_solver = CaptioningSolver( small_lstm_model, small_data, update_rule='adam', num_epochs=50, batch_size=25, optim_config={ 'learning_rate': 5e-3, }, lr_decay=0.995, verbose=True, print_every=10, ) small_lstm_solver.train() # Plot the training losses plt.plot(small_lstm_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() for split in ['train', 'val']: minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_lstm_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
def train(self): """ Run optimization to train the model. """ num_train = self.data['train_captions'].shape[0] iterations_per_epoch = max(num_train / self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in xrange(num_iterations): self._step() # Maybe print training loss if self.verbose and t % self.print_every == 0: print '(Iteration %d / %d) loss: %f' % (t + 1, num_iterations, self.loss_history[-1]) # At the end of every epoch, increment the epoch counter and decay the # learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]['learning_rate'] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. # TODO: Implement some logic to check Bleu on validation set periodically if t == 0 or epoch_end or t == num_iterations - 1: captions_train, features_train, _ = sample_coco_minibatch( self.data, batch_size=self.batch_size, split='train') self.train_acc_history.append( self.check_accuracy(features_train, captions_train)) captions_valid, features_valid, _ = sample_coco_minibatch( self.data, batch_size=self.batch_size, split='val') self.val_acc_history.append( self.check_accuracy(features_valid, captions_valid)) print '(Iteration %d / %d) train accuracy: %f, valid accuracy: %f' % ( t + 1, num_iterations, self.train_acc_history[-1], self.val_acc_history[-1])
def train(self): """ Run optimization to train the model. """ num_train = self.data['train_captions'].shape[0] iterations_per_epoch = max(num_train / self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in xrange(num_iterations): self._step() # Maybe print training loss if self.verbose and t % self.print_every == 0: print '(Iteration %d / %d) loss: %f' % ( t + 1, num_iterations, self.loss_history[-1]) # At the end of every epoch, increment the epoch counter and decay the # learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]['learning_rate'] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. # TODO: Implement some logic to check Bleu on validation set periodically if t == 0 or epoch_end or t == num_iterations-1: captions_train, features_train, _ = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') self.train_acc_history.append(self.check_accuracy(features_train, captions_train)) captions_valid, features_valid, _ = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='val') self.val_acc_history.append(self.check_accuracy(features_valid, captions_valid)) print '(Iteration %d / %d) train accuracy: %f, valid accuracy: %f' % ( t + 1, num_iterations, self.train_acc_history[-1], self.val_acc_history[-1])
def _step(self): mini_batch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') captions, features, urls = mini_batch loss, grads = self.model.loss(features, captions) self.loss_history.append(loss) for p, w in self.model.params.items(): dw = grads[p] config = self.optim_params[p] next_w, next_config = self.update_rule(w, dw, config) self.model.params[p] = next_w self.optim_params[p] = next_config
def demo(data, model): start = data['word_to_idx']['<START>'] end = data['word_to_idx']['<END>'] null = data['word_to_idx']['<NULL>'] for split in ['train', 'val']: minibatch = sample_coco_minibatch(data, split=split, batch_size=2) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = model.sample(features, start, end, null) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
def check_bleu(self, split, num_samples, batch_size=100, check_loss=False): """ Check accuracy of the model on the provided data. Inputs: - split: String 'train' or 'val' - num_samples: Subsample the data and only test the model on num_samples datapoints. - batch_size: Split data into batches of this size to avoid using too much memory. Returns: - bleu: Scalar giving the words that were correctly generated by the model. """ # Subsample the data minibatch = sample_coco_minibatch(self.data, batch_size=num_samples, split=split) captions, features, urls = minibatch if check_loss: loss, _ = self.model.loss(features, captions) gt_captions = decode_captions(captions, self.data['idx_to_word']) # Compute word generations in batches num_batches = num_samples // batch_size if num_samples % batch_size != 0: num_batches += 1 total_score = 0.0 for i in range(num_batches): start = i * batch_size end = (i + 1) * batch_size sample_captions = self.model.sample(features[start:end]) sample_captions = decode_captions(sample_captions, self.data['idx_to_word']) for gt_caption, sample_caption in zip(gt_captions[start:end], sample_captions): total_score += BLEU_score(gt_caption, sample_caption) if check_loss: return loss, total_score / num_samples return total_score / num_samples
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split="train") captions, features, urls = minibatch # Compute loss and gradient loss, grads = self.model.loss(features, captions) self.loss_history.append(loss) # Perform a parameter update for p, w in self.model.params.iteritems(): dw = grads[p] config = self.optim_configs[p] next_w, next_config = self.update_rule(w, dw, config) self.model.params[p] = next_w self.optim_configs[p] = next_config
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatchdata = sample_coco_minibatch(data=self.data, batch_size=self.batch_size, split='train') captions, features, urls = minibatchdata # Compute loss and gradient loss, grads = self.model.loss(features=features, captions=captions) self.loss_history.append(loss) # Perform a parameter update for k, v in self.model.params.items(): dw = grads[k] config = self.optim_configs[k] next_w, next_config = self.update_rule(w=v, dw=dw, config=config) self.model.params[k] = next_w self.optim_configs[k] = next_config
def train(self, data, num_epochs): """ Train the model by tensorflow. """ # Make a minibatch of training data self.batch_size = 25 loss_history = [] print('Start Training') with tf.Session() as sess: num_train = data['train_captions'].shape[0] iterations_per_epoch = max(num_train // self.batch_size, 1) num_iterations = num_epochs * iterations_per_epoch sess.run(tf.global_variables_initializer()) for t in range(num_iterations): minibatch = sample_coco_minibatch(data, self.batch_size, split='train') captions, features, urls = minibatch _, loss_t = sess.run([self.train_op,self.loss],feed_dict= \ {self.input_features:features,self.input_captions:captions}) loss_history.append(loss_t) if t % 1000 ==0: print('Iteration: %d, loss: %f' %(t,loss_t)) return loss_history
def check_accuracy(self, model): """ Check accuracy of the model on the provided data. Inputs: - X: Array of data, of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,) - num_samples: If not None, subsample the data and only test the model on num_samples datapoints. - batch_size: Split X and y into batches of this size to avoid using too much memory. Returns: - acc: Scalar giving the fraction of instances that were correctly classified by the model. """ BLEUscores = {} for split in ['train', 'val']: minibatch = sample_coco_minibatch(self.data, split=split, batch_size=1000) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, self.data['idx_to_word']) sample_captions = model.sample(features) sample_captions = decode_captions(sample_captions, self.data['idx_to_word']) total_score = 0.0 for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): total_score += BLEU_score(gt_caption, sample_caption) BLEUscores[split] = total_score / len(sample_captions) self.train_acc_history.append(BLEUscores['train']) self.val_acc_history.append(BLEUscores['val'])
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') captions, features, urls = minibatch # Compute loss and gradient loss, grads = self.model.loss(features, captions) self.loss_history.append(loss) # Perform a parameter update for p, w in self.model.params.items(): # {权重名:权重矩阵} dw = grads[p] # 提取权重p的梯度 config = self.optim_configs[p] # 提取权重p的更新规则 next_w, next_config = self.update_rule(w, dw, config) # 调用optim的方法更新参数 self.model.params[p] = next_w # 存储已更新的权重 self.optim_configs[p] = next_config # 保存更新规则中的参数更新
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') captions, features, urls = minibatch # Compute loss and gradient loss, grads = self.model.loss(features, captions) # Perform a parameter update for p, w in self.model.params.items(): dw = grads[p] config = self.optim_configs[p] next_w, next_config = self.update_rule(w, dw, config) self.model.params[p] = next_w self.optim_configs[p] = next_config return loss
def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data minibatch = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') captions, features, urls = minibatch # Compute loss and gradient loss, grads = self.model.loss(features, captions) self.loss_history.append(loss) # Perform a parameter update for p, w in self.model.params.items(): dw = grads[p] ## 取出当前参数的梯度dw config = self.optim_configs[p] ## 取出当前参数的优化规则 next_w, next_config = self.update_rule( w, dw, config) ## 调用update_ruled对该参数进行优化,同时优化规则也会发生改变(比如学习率衰退了) self.model.params[p] = next_w ## 将更新后的参数输入的模型中 self.optim_configs[p] = next_config
def train(self, best=False, early_stop=False): """ Run optimization to train the model. """ best_loss = 99999.9 patiance = 0 num_train = self.data['train_captions'].shape[0] iterations_per_epoch = int(max(num_train / self.batch_size, 1)) num_iterations = self.num_epochs * iterations_per_epoch for t in range(num_iterations): minibatch_train = sample_coco_minibatch(self.data, batch_size=self.batch_size, split='train') minibatch_val = sample_coco_minibatch(self.data, batch_size=num_train, split='val') self._step(minibatch_train) # At the end of every epoch, increment the epoch counter and decay the # learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.optim_configs: self.optim_configs[k]['learning_rate'] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. # TODO: Implement some logic to check Bleu on validation set periodically captions, features, urls = minibatch_val mean_loss = self.check_accuracy(features, captions, num_samples=None, batch_size=int(num_train * 0.2)) if mean_loss > best_loss: patiance += 1 if best_loss > mean_loss: best_loss = mean_loss self.best_params = self.model.params patiance = 0 # Maybe print training loss if self.verbose and t % self.print_every == 0: if self.epoch < 1: mean_loss = 999.9 print( '(Iteration %d / %d) loss: %f val_mean_loss: %f best val loss: %f' % (t + 1, num_iterations, self.loss_history[-1], mean_loss, best_loss)) if patiance > 3 and early_stop: print( 'Early stopping loss: %f val_mean_loss: %f best val loss: %f' % (self.loss_history[-1], mean_loss, best_loss)) break # At the end of training swap the best params into the model if best: self.model.params = self.best_params
# ## Look at the data # It is always a good idea to look at examples from the dataset before working with it. # # You can use the `sample_coco_minibatch` function from the file `cs231n/coco_utils.py` to sample minibatches of data from the data structure returned from `load_coco_data`. Run the following to sample a small minibatch of training data and show the images and their captions. Running it multiple times and looking at the results helps you to get a sense of the dataset. # # Note that we decode the captions using the `decode_captions` function and that we download the images on-the-fly using their Flickr URL, so **you must be connected to the internet to viw images**. # In[ ]: # Sample a minibatch and show the images and captions batch_size = 3 captions, features, urls = sample_coco_minibatch(data, batch_size=batch_size) for i, (caption, url) in enumerate(zip(captions, urls)): plt.imshow(image_from_url(url)) plt.axis('off') caption_str = decode_captions(caption, data['idx_to_word']) plt.title(caption_str) plt.show() # # Recurrent Neural Networks # As discussed in lecture, we will use recurrent neural network (RNN) language models for image captioning. # The file `cs231n/rnn_layers.py` contains implementations of different layer types that are needed for recurrent # neural networks, and the file `cs231n/classifiers/rnn.py` uses these layers to implement an image captioning model. # # We will first implement different types of RNN layers in `cs231n/rnn_layers.py`.
lr_decay=0.995, verbose=True, print_every=10, ) small_lstm_solver.train() # Plot the training losses plt.plot(small_lstm_solver.loss_history) plt.xlabel('Iteration') plt.ylabel('Loss') plt.title('Training loss history') plt.show() # generate caption for split in ['train', 'val']: minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_lstm_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) idx = 0 for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') info = 'image/lstm_' + split + str(idx) + '.jpg' idx += 1 plt.savefig(info)
""" returns relative error """ return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y)))) data = load_coco_data(pca_features=True) # Print out all the keys and values from the data dictionary for k, v in data.items(): if type(v) == np.ndarray: print(k, type(v), v.shape, v.dtype) else: print(k, type(v), len(v)) batch_size = 3 captions, features, urls = sample_coco_minibatch(data, batch_size=batch_size) # show 3 image # for i, (caption, url) in enumerate(zip(captions, urls)): # plt.imshow(image_from_url(url)) # plt.axis('off') # caption_str = decode_captions(caption, data['idx_to_word']) # plt.title(caption_str) # plt.show() # DONE: Vanilla RNN: step forward N, D, H = 3, 10, 4 x = np.linspace(-0.4, 0.7, num=N * D).reshape(N, D) prev_h = np.linspace(-0.2, 0.5, num=N * H).reshape(N, H) Wx = np.linspace(-0.1, 0.9, num=D * H).reshape(D, H)
print(k, type(v), len(v)) # ## Look at the data # It is always a good idea to look at examples from the dataset before working with it. # # You can use the `sample_coco_minibatch` function from the file `cs231n/coco_utils.py` to sample minibatches of data from the data structure returned from `load_coco_data`. Run the following to sample a small minibatch of training data and show the images and their captions. Running it multiple times and looking at the results helps you to get a sense of the dataset. # # Note that we decode the captions using the `decode_captions` function and that we download the images on-the-fly using their Flickr URL, so **you must be connected to the internet to view images**. # In[ ]: # Sample a minibatch and show the images and captions batch_size = 3 captions, features, urls = sample_coco_minibatch(data, batch_size=batch_size) for i, (caption, url) in enumerate(zip(captions, urls)): plt.imshow(image_from_url(url)) plt.axis('off') caption_str = decode_captions(caption, data['idx_to_word']) plt.title(caption_str) plt.show() # # Recurrent Neural Networks # As discussed in lecture, we will use recurrent neural network (RNN) language models for image captioning. The file `cs231n/rnn_layers.py` contains implementations of different layer types that are needed for recurrent neural networks, and the file `cs231n/classifiers/rnn.py` uses these layers to implement an image captioning model. # # We will first implement different types of RNN layers in `cs231n/rnn_layers.py`. # # Vanilla RNN: step forward # Open the file `cs231n/rnn_layers.py`. This file implements the forward and backward passes for different types of layers that are commonly used in recurrent neural networks.
if num_layers == 1: ckpt_save_dir = ".\save-sigle-layer" elif num_layers == 2: ckpt_save_dir = ".\save-double-layer" else: print('check num_layers') exit() Mode = 3 # 0: train 1: test. 2: BLEU socre, 3: meta file test if Mode == 0: with tf.device('/cpu:0'): sess = tf.Session() minibatch = sample_coco_minibatch(small_data, split='train', batch_size=batch_size) captions, features, urls = minibatch _, T = captions.shape model = CaptioningRNN(word_to_idx=data['word_to_idx'], input_dim=input_dim, wordvec_dim=wordvec_dim, hidden_dim=hidden_dim, batch_size=batch_size, seq_length=T - 1, num_layers=num_layers) train = tf.train.AdamOptimizer(0.001).minimize(model.loss) num_batch = int(max_train / batch_size)
# batch_size=25, # optim_config={ # 'learning_rate': 5e-3, # }, # lr_decay=0.995, # verbose=True, print_every=10, # ) # #small_lstm_solver.train() # ## Plot the training losses #plt.plot(small_lstm_solver.loss_history) #plt.xlabel('Iteration') #plt.ylabel('Loss') #plt.title('Training loss history') #plt.show() for split in ['train', 'val']: minibatch = sample_coco_minibatch(small_data, split=split, batch_size=2) gt_captions, features, urls = minibatch gt_captions = decode_captions(gt_captions, data['idx_to_word']) sample_captions = small_lstm_model.sample(features) sample_captions = decode_captions(sample_captions, data['idx_to_word']) for gt_caption, sample_caption, url in zip(gt_captions, sample_captions, urls): plt.imshow(image_from_url(url)) plt.title('%s\n%s\nGT:%s' % (split, sample_caption, gt_caption)) plt.axis('off') plt.show()
hidden_dim = 512 wordvec_dim = 256 vocab_size = len(data['word_to_idx']) # print(vocab_size) batch_size = 25 iterations_per_epoch = max(num_train // batch_size, 1) num_iterations = num_epochs * iterations_per_epoch # params, model, sample = make_model(hidden_dim, wordvec_dim, vocab_size) model = AttentionRnn(hidden_dim, wordvec_dim, vocab_size) optimizer = optim.Adam(model.parameters(), lr=1e-3) for t in range(num_iterations): minibatch = sample_coco_minibatch(data, batch_size=batch_size, split='train') captions, features, urls = minibatch loss = model((features, captions)) if t % 10 == 0: print(time.strftime('%X %x %Z'), t, num_iterations, loss.data[0]) step(loss, optimizer) def demo(data, model): start = data['word_to_idx']['<START>'] end = data['word_to_idx']['<END>'] null = data['word_to_idx']['<NULL>'] for split in ['train', 'val']: minibatch = sample_coco_minibatch(data, split=split, batch_size=2)