def test(data,
         model,
         sess,
         test_model,
         batch_size,
         idx_to_word,
         split='test',
         attention_visualization=True,
         save_sampled_captions=True):
    '''
        Args:
            - data: dictionary with the following keys:
            - features: Feature vectors of shape (5000, 196, 512)
            - file_names: Image file names of shape (5000, )
            - captions: Captions of shape (24210, 17) 
            - image_idxs: Indices for mapping caption to image of shape (24210, ) 
            - features_to_captions: Mapping feature to captions (5000, 4~5)
            - split: 'train', 'val' or 'test'
            - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook)
            - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores.
        '''

    features = data['features']

    # build a graph to sample captions
    #alphas, betas, sampled_captions = model.generate(max_len=20)    # (N, max_len, L), (N, max_len)

    # config = tf.ConfigProto(allow_soft_placement=True)
    # config.gpu_options.allow_growth = True
    #with tf.Session(config=config) as sess:
    if 1:
        model.build()
        saver = tf.train.Saver(max_to_keep=40)
        saver.restore(sess, test_model)
        #features_batch, test_features_batch, image_files = sample_coco_minibatch(data, batch_size)
        #feed_dict = features_batch
        val_features = data['features']
        val_batch_size = batch_size
        n_iters_val = int(
            np.ceil(float(val_features.shape[0]) / val_batch_size))
        all_gen_cap = np.ndarray((val_features.shape[0], 30))
        for i in range(n_iters_val):
            features_batch = val_features[i * val_batch_size:(i + 1) *
                                          val_batch_size]
            val_detect_batch = np.empty((len(features_batch), 10, 4096))
            m = 0
            for j in range(i * val_batch_size, (i + 1) * val_batch_size):
                val_detect_single = hickle.load('./data_residue_detect/test/' +
                                                'test_' + str(j) +
                                                '.features.hkl')
                val_detect_single = val_detect_single[-10:, :]
                val_detect_batch[m, :] = val_detect_single
                m = m + 1
            _, _, _, _, gen_cap = model.generate(features_batch,
                                                 val_detect_batch)
            all_gen_cap[i * val_batch_size:(i + 1) * val_batch_size] = gen_cap
        all_decoded = decode_captions(all_gen_cap, idx_to_word)
        save_pickle(all_decoded, "./data/test/test.candidate.captions.pkl")
        scores = evaluate(data_path='./data', split='test', get_scores=True)
Пример #2
0
    def _validation_score(self,e,sess,image_val,generated_captions,lr):
        file_names = load_pickle('/data1/junjiaot/data/val/val.file.names.pkl')
        print('number of validation features:', len(file_names))
        coco_attributes = load_pickle('/data1/junjiaot/data/val/val.multi_class_labels_abs.pkl')
        #print(coco_attributes.shape)
        coco_attributes_onehot = load_pickle('/data1/junjiaot/data/val/val.multi_class_labels.pkl') #(82783,1113)
        #pkl_dir = '/home/junjiaot/data_local/val/resnet152_v1_feature/'#'/data1/junjiaot/data/val/resnet152_v1_feature/'
        #pickle_files = []
        #for file in file_names:
        #    pickle_files.append(os.path.join(pkl_dir,file.split('/')[2].split('.')[0]+'.pkl'))
        #pickle_files = np.array(pickle_files)
        image_dir = '/home/junjiaot/data_local/val2014'#'/data1/junjiaot/image/resized_224_aspect/val2014'#'/data1/junjiaot/image/resized_224_aspect/train2014'
        image_files = []
        for file in file_names:
            image_files.append(os.path.join(image_dir,file.split('/')[2]))
        image_files = np.array(image_files)

        start = 0
        end = self.batch_size
        n_iters_val = int(np.ceil(float(len(file_names))/self.batch_size))
        all_gen_cap = np.ndarray((len(file_names), 20))
        for i in range(n_iters_val):
            #features_batch = np.array(list(map(lambda x:pickle.load(open(x,'rb')),pickle_files[start:end]))).squeeze()
            image_batch = []
            for image_file in image_files[start:end]:
                image = cv2.imread(image_file)
                b,g,r = cv2.split(image)
                rgb_img = cv2.merge([r,g,b])
                image_batch.append(sess.run(image_val,feed_dict={self.image:rgb_img}))
            #image_batch = image_val_all[start:end,:,:,:]
            #attribute_batch = coco_attributes[start:end]
            attribute_onehot_batch = coco_attributes_onehot[start:end]
            feed_dict = {self.images: image_batch,#self.model.attributes:attribute_batch,
                         self.model.attributes_onehot: attribute_onehot_batch,
                         self.model.keep_prob:1.0}
            gen_cap = sess.run(generated_captions, feed_dict=feed_dict)
            #import ipdb; ipdb.set_trace()
            all_gen_cap[i*self.batch_size:(i+1)*self.batch_size] = gen_cap[:]
            start = end
            end = end + self.batch_size
            #if end > len(file_names):
            #    residual = start - (len(file_names) - self.batch_size)
            #    end = len(file_names) - 1
            #    start = end - self.batch_size

        all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word)
        save_pickle(all_decoded, "/data1/junjiaot/data/val/val.candidate.captions.pkl")
        print('Calculating scores...')
        scores = evaluate(data_path='/data1/junjiaot/data', split='val', get_scores=True)
        write_bleu(scores=scores, path=os.path.join(self.model_path,'Score'), epoch=e,lr=lr)
Пример #3
0
                          learning_rate=0.0025,
                          print_every=2000,
                          save_every=1,
                          image_path='./image/val2014_resized',
                          pretrained_model=None,
                          model_path='./model/lstm',
                          test_model='./model/lstm3/model-18',
                          print_bleu=False,
                          log_path='./log/')

# In[7]:

solver.test(data, split='val')

# In[8]:

test = load_coco_data(data_path='./data', split='test')

# In[13]:

tf.get_variable_scope().reuse_variables()
solver.test(test, split='test')

# In[14]:

evaluate(data_path='./data', split='val')

# In[15]:

evaluate(data_path='./data', split='test')
    def train(self):
        """
        training
        :return:
        """
        loss = self.model.build_model()

        with tf.variable_scope(tf.get_variable_scope()) as scope:
            with tf.name_scope('optimizer'):
                tf.get_variable_scope().reuse_variables()
                _, _, generated_captions = self.model.build_sampler(max_len=self.max_words_len)

                self.global_step = tf.Variable(0, name="global_step", trainable=False)
                lr = tf.train.exponential_decay(learning_rate=self.learning_rate, global_step=self.global_step,
                                                decay_steps=TrainingArg.lr_decay_steps,
                                                decay_rate=0.96, staircase=True, name='learn_rate')
                optimizer = self.optimizer(learning_rate=lr)
                grads = tf.gradients(loss, tf.trainable_variables())
                grads_and_vars = list(zip(grads, tf.trainable_variables()))
        train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars, global_step=self.global_step)

        # summary op
        tf.summary.scalar('batch_loss', loss)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_and_vars:
            if grad is not None:
                tf.summary.histogram(var.op.name + '/gradient', grad)

        summary_op = tf.summary.merge_all()

        config = tf.ConfigProto(allow_soft_placement=True)
        # config.gpu_options.per_process_gpu_memory_fraction=0.9
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            self.pre_mgr.set_tf_sess(sess)

            tf.initialize_all_variables().run()
            summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph())
            saver = tf.train.Saver(max_to_keep=10)

            if self.pretrained_model is not None:
                print("Start training with pretrained Model..")
                saver.restore(sess, self.pretrained_model)

            curr_epoch = 0
            batchs = self.pre_mgr.fetch_batch(Const.caption_train_vector_path, self.data_dir,
                                              self.batch_size, self.n_epochs)

            for batch in batchs:
                caption_batch, image_batch, epoch = batch
                feed_dict = {self.model.features: image_batch, self.model.captions: caption_batch}
                _, l, step = sess.run([train_op, loss, self.global_step], feed_dict)

                if step % self.print_every == 0 or step == 1:
                    summary = sess.run(summary_op, feed_dict)
                    summary_writer.add_summary(summary, step)
                    print("\nTrain loss at epoch %d & step %d (mini-batch): %.5f" % (epoch + 1, step, l))
                    # ground_truths = captions[image_idxs == image_idxs_batch[0]]
                    ground_truths = np.array([caption_batch[0]])
                    decoded = self.pre_mgr.decode_captions(ground_truths, self.model.idx_to_word)
                    for j, gt in enumerate(decoded):
                        print("Ground truth %d: %s" % (j + 1, gt))
                    gen_caps = sess.run(generated_captions, feed_dict)
                    decoded = self.pre_mgr.decode_captions(gen_caps, self.model.idx_to_word)
                    print("Generated caption: %s\n" % decoded[0])

                print('{}, epoch:{} step: {},Current epoch loss: {}'.format(datetime.datetime.now().isoformat(), epoch + 1, step, l))

                # print(out BLEU scores and file write
                if curr_epoch != epoch or step == 1 or step % self.print_every == 0:
                    curr_epoch = epoch
                    val_data_batchs = self.pre_mgr.fetch_val_batch(Const.val_vector_out_path, self.data_dir, self.batch_size)
                    gen_caps = []
                    i = 0
                    for val_batch in val_data_batchs:
                        val_caption, val_image = val_batch
                        # features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size]
                        feed_dict = {self.model.features: val_image}
                        gen_cap = sess.run(generated_captions, feed_dict=feed_dict)
                        gen_caps.extend(gen_cap)
                        if not self.val_data_flag:
                            print('val batch loop {}'.format(i))
                            for item in val_caption:
                                self.org_decoded[i] = self.pre_mgr.decode_captions(np.array(item), self.model.idx_to_word,
                                                                                   ignore_start=True)
                                i += 1
                                # break
                    self.val_data_flag = True
                    gen_decoded = self.pre_mgr.decode_captions(np.array(gen_caps), self.model.idx_to_word)
                    for j in range(5):
                        print('val org sents: {}'.format(self.org_decoded[j]))
                        print('val gen sents: {}\n'.format(gen_decoded[j]))

                    scores = evaluate(gen_decoded, self.org_decoded, get_scores=True)
                    utils.write_bleu(scores=scores, path=self.model_path, epoch=epoch)

                    # save model's parameters
                    # if (e + 1) % self.save_every == 0:
                    saver.save(sess, os.path.join(self.model_path, 'model'), global_step=step)
                    print("model-%s saved." % (epoch + 1))
Пример #5
0
def main(unused_argv):
    #matplotlib inline
    plt.rcParams['figure.figsize'] = (8.0, 6.0)  # set default size of plots
    plt.rcParams['image.interpolation'] = 'nearest'
    plt.rcParams['image.cmap'] = 'gray'

    data = load_coco_data(
        data_path='/home/yifan/PythonProjects/im2txt-att/data', split='val')
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)

    # val settings
    split = 'train'
    batch_size = 128

    # basic settings
    test_path = '/home/yifan/PythonProjects/im2txt-att/model/lstm/model-test'
    model_path = '/home/yifan/PythonProjects/im2txt-att/model/lstm/model.ckpt'

    model = ShowAndTellModel(word_to_idx,
                             mode='eval',
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             alpha_c=1.0)

    # test
    features = data['features']

    # build a graph to sample captions
    alphas, betas, sampled_captions = model.build_model(max_len=20)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, model_path)
        features_batch, image_files = sample_coco_minibatch(data, batch_size)
        feed_dict = {model.features: features_batch}
        alps, bts, sam_cap = sess.run([alphas, betas, sampled_captions],
                                      feed_dict)
        decoded = decode_captions(sam_cap, model.idx_to_word)

        # attention visualization
        for n in range(10):
            print("Sampled Caption: %s" % decoded[n])

            # Plot original image
            img = ndimage.imread(image_files[n])
            plt.subplot(4, 5, 1)
            plt.imshow(img)
            plt.axis('off')

            # Plot images with attention weights
            words = decoded[n].split(" ")
            for t in range(len(words)):
                if t > 18:
                    break
                plt.subplot(4, 5, t + 2)
                plt.text(0,
                         1,
                         '%s(%.2f)' % (words[t], bts[n, t]),
                         color='black',
                         backgroundcolor='white',
                         fontsize=8)
                plt.imshow(img)
                alp_curr = alps[n, t, :].reshape(14, 14)
                alp_img = skimage.transform.pyramid_expand(alp_curr,
                                                           upscale=16,
                                                           sigma=20)
                plt.imshow(alp_img, alpha=0.85)
                plt.axis('off')
            plt.show()

        # print out BLEU scores and file write
        all_sam_cap = np.ndarray((features.shape[0], 20))
        num_iter = int(np.ceil(float(features.shape[0]) / batch_size))
        for i in range(num_iter):
            features_batch = features[i * batch_size:(i + 1) * batch_size]
            feed_dict = {model.features: features_batch}
            all_sam_cap[i * batch_size:(i + 1) * batch_size] = sess.run(
                sampled_captions, feed_dict)
        all_decoded = decode_captions(all_sam_cap, model.idx_to_word)
        save_pickle(
            all_decoded,
            "/home/yifan/PythonProjects/im2txt-att/data/%s/%s.candidate.captions.pkl"
            % (split, split))
        scores = evaluate(
            data_path='/home/yifan/PythonProjects/im2txt-att/data',
            split='val',
            get_scores=True)
        write_bleu(scores=scores, path=test_path, epoch=0)
Пример #6
0
    def train(self):
        # train/val dataset
        n_examples = self.data['captions'].shape[0]
        #n_examples = 5000
        n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size))
        features = self.data['features']
        captions = self.data['captions']
        image_idxs = self.data['image_idxs']
        val_features = self.val_data['features']
        n_iters_val = int(
            np.ceil(float(val_features.shape[0]) / self.batch_size))

        # build graphs for training model and sampling captions
        loss = self.model.build_model()
        with tf.variable_scope(tf.get_variable_scope()) as scope:
            with tf.name_scope('optimizer'):
                tf.get_variable_scope().reuse_variables()
                _, _, generated_captions = self.model.build_sampler(max_len=20)
                optimizer = self.optimizer(learning_rate=self.learning_rate)
                grads = tf.gradients(loss, tf.trainable_variables())
                grads_and_vars = list(zip(grads, tf.trainable_variables()))
        train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

        # summary op
        tf.summary.scalar('batch_loss', loss)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_and_vars:
            tf.summary.histogram(var.op.name + '/gradient', grad)

        summary_op = tf.summary.merge_all()

        print "The number of epoch: %d" % self.n_epochs
        print "Data size: %d" % n_examples
        print "Batch size: %d" % self.batch_size
        print "Iterations per epoch: %d" % n_iters_per_epoch

        config = tf.ConfigProto(allow_soft_placement=True)
        #config.gpu_options.per_process_gpu_memory_fraction=0.9
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            tf.initialize_all_variables().run()
            summary_writer = tf.summary.FileWriter(
                self.log_path, graph=tf.get_default_graph())
            saver = tf.train.Saver(max_to_keep=20)

            if self.pretrained_model is not None:
                print "Start training with pretrained Model."
                saver.restore(sess, self.pretrained_model)

            prev_loss = -1
            curr_loss = 0
            start_t = time.time()

            for e in range(self.n_epochs):
                rand_idxs = np.random.permutation(n_examples)
                captions = captions[rand_idxs]
                image_idxs = image_idxs[rand_idxs]

                for i in range(n_iters_per_epoch):
                    captions_batch = captions[i * self.batch_size:(i + 1) *
                                              self.batch_size]
                    image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) *
                                                  self.batch_size]
                    print image_idxs_batch
                    features_batch = features[image_idxs_batch]
                    feed_dict = {
                        self.model.features: features_batch,
                        self.model.captions: captions_batch
                    }
                    _, l = sess.run([train_op, loss], feed_dict)
                    curr_loss += l

                    # write summary for tensorboard visualization
                    if i % 10 == 0:
                        summary = sess.run(summary_op, feed_dict)
                        summary_writer.add_summary(summary,
                                                   e * n_iters_per_epoch + i)

                    if (i + 1) % self.print_every == 0:
                        print(
                            "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f"
                            % (e + 1, i + 1, l))
                        ground_truths = captions[image_idxs ==
                                                 image_idxs_batch[0]]
                        decoded = decode_captions(ground_truths,
                                                  self.model.idx_to_word)
                        for j, gt in enumerate(decoded):
                            print("Ground truth %d: %s" % (j + 1, gt))
                        gen_caps = sess.run(generated_captions, feed_dict)
                        decoded = decode_captions(gen_caps,
                                                  self.model.idx_to_word)
                        print("Generated caption: %s\n" % decoded[0])

                print("Previous epoch loss: ", prev_loss)
                print("Current epoch loss: ", curr_loss)
                print("Elapsed time: ", time.time() - start_t)
                prev_loss = curr_loss
                curr_loss = 0

                # print out BLEU scores and file write
                if self.print_bleu:
                    all_gen_cap = np.ndarray((val_features.shape[0], 20))
                    for i in range(n_iters_val):
                        features_batch = val_features[i *
                                                      self.batch_size:(i + 1) *
                                                      self.batch_size]
                        feed_dict = {self.model.features: features_batch}
                        gen_cap = sess.run(generated_captions,
                                           feed_dict=feed_dict)
                        all_gen_cap[i * self.batch_size:(i + 1) *
                                    self.batch_size] = gen_cap

                    all_decoded = decode_captions(all_gen_cap,
                                                  self.model.idx_to_word)
                    save_pickle(all_decoded,
                                "./data/val/val.candidate.captions.pkl")
                    scores = evaluate(data_path='./data',
                                      split='val',
                                      get_scores=True)
                    write_bleu(scores=scores, path=self.model_path, epoch=e)

                # save model's parameters
                if (e + 1) % self.save_every == 0:
                    saver.save(sess,
                               os.path.join(self.model_path, 'model'),
                               global_step=e + 1)
                    print "model-%s saved." % (e + 1)
Пример #7
0
    def train(self):
        # train/val dataset
        # Changed this because I keep less features than captions, see prepro
        #path = '/media/zaheer/Data/Image_Text_Datasets/IU_Xray/latest/Two_Images/word/Sample1/'
        n_examples = self.data['captions'].shape[0]
        #n_examples = self.data['features'].shape[0]
        n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size))
        features = self.data['features']

        captions = self.data['captions']
        image_idxs = self.data['image_idxs']
        val_features = self.val_data['features']
        val_captions = self.val_data['captions']
        n_iters_val = int(
            np.ceil(float(val_features.shape[0]) / self.batch_size))

        # build graphs for training model and sampling captions
        # This scope fixed things!!
        with tf.variable_scope(tf.get_variable_scope()):
            loss = self.model.build_model()
            tf.get_variable_scope().reuse_variables()
            alph, bts, generated_captions = self.model.build_sampler(
                split='val', max_len=101)
            val_loss = self.model.valid_loss()

        # train op
        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            early_optimizer = self.optimizer(learning_rate=self.learning_rate)
            mid_optimizer = self.optimizer(learning_rate=self.learning_rate /
                                           10)
            late_optimizer = self.optimizer(learning_rate=self.learning_rate /
                                            100)
            grads = tf.gradients(loss, tf.trainable_variables())
            grads_and_vars = list(zip(grads, tf.trainable_variables()))
            train_op = early_optimizer.apply_gradients(
                grads_and_vars=grads_and_vars)
            train_op0 = mid_optimizer.apply_gradients(
                grads_and_vars=grads_and_vars)
            train_op1 = late_optimizer.apply_gradients(
                grads_and_vars=grads_and_vars)

        # summary op
        val_loss_sum = tf.summary.scalar('val_batch_loss',
                                         val_loss)  #### Added by Zaheer
        train_loss_sum = tf.summary.scalar('batch_loss', loss)
        # tf.scalar_summary('batch_loss', loss)
        # tf.summary.scalar('batch_loss', loss)
        # for var in tf.trainable_variables():
        #     #tf.histogram_summary(var.op.name, var)
        #     tf.summary.histogram(var.op.name, var)
        # for grad, var in grads_and_vars:
        #     #tf.histogram_summary(var.op.name+'/gradient', grad)
        #     tf.summary.histogram(var.op.name+'/gradient', grad)
        #
        # #summary_op = tf.merge_all_summaries()
        # summary_op = tf.summary.merge_all()

        print("The number of epoch: %d" % self.n_epochs)
        print("Data size: %d" % n_examples)
        print("Batch size: %d" % self.batch_size)
        print("Iterations per epoch: %d" % n_iters_per_epoch)

        #config = tf.ConfigProto(allow_soft_placement = True,device_count = {'GPU': 0}, log_device_placement=True)
        config = tf.ConfigProto(log_device_placement=True)
        run_options = tf.RunOptions()
        #config.gpu_options.per_process_gpu_memory_fraction=0.9
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            tf.global_variables_initializer().run()
            #summary_writer = tf.train.SummaryWriter(self.log_path, graph=tf.get_default_graph())
            summary_writer = tf.summary.FileWriter(
                self.log_path + '/train/', graph=tf.get_default_graph())
            val_summary_writer = tf.summary.FileWriter(
                self.log_path + '/val/',
                graph=tf.get_default_graph())  ### Added by Zaheer
            saver = tf.train.Saver(max_to_keep=100)  # given value was 40

            if self.pretrained_model is not None:
                print("Start training with pretrained Model..")
                saver.restore(sess, self.pretrained_model)

            prev_loss = -1
            curr_loss = 0
            start_t = time.time()

            for e in range(self.n_epochs):
                tik_epoch = time.time()
                print("epoch:", e)
                rand_idxs = np.random.permutation(n_examples)
                captions = captions[rand_idxs]
                image_idxs = image_idxs[rand_idxs]

                for i in range(n_iters_per_epoch):
                    tik_batch = time.time()
                    captions_batch = captions[i * self.batch_size:(i + 1) *
                                              self.batch_size]
                    image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) *
                                                  self.batch_size]
                    features_batch = features[image_idxs_batch]
                    feed_dict = {
                        self.model.features: features_batch,
                        self.model.captions: captions_batch
                    }
                    print(i)
                    if (e <= 30):
                        _, l = sess.run([train_op, loss],
                                        feed_dict,
                                        options=run_options)
                    # elif (e<=60 and e>30):
                    #    _, l = sess.run([train_op0, loss], feed_dict)
                    else:
                        _, l = sess.run([train_op0, loss],
                                        feed_dict,
                                        options=run_options)
                    curr_loss += l
                    #print('Time to execute a batch:',time.time()-tik_batch)
                    # write summary for tensorboard visualization
                    if i % 10 == 0:
                        summary = sess.run(train_loss_sum, feed_dict)
                        summary_writer.add_summary(summary,
                                                   e * n_iters_per_epoch + i)

                    if ((i + 1) % self.print_every == 0):
                        print(
                            "\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f"
                            % (e + 1, i + 1, l))
                        ground_truths = captions[image_idxs ==
                                                 image_idxs_batch[0]]
                        decoded = decode_captions(ground_truths,
                                                  self.model.idx_to_word)
                        for j, gt in enumerate(decoded):
                            print("Ground truth %d: %s" % (j + 1, gt))
                        gen_caps = sess.run(generated_captions, feed_dict)
                        decoded = decode_captions(gen_caps,
                                                  self.model.idx_to_word)
                        print("Generated caption: %s\n" % decoded[0])
                #print('Time to execute an epoch:', time.time() - tik_epoch)
                print("Previous epoch loss: ", prev_loss)
                print("Current epoch loss: ", curr_loss)
                print("Elapsed time: ", time.time() - start_t)
                prev_loss = curr_loss
                curr_loss = 0
                curr_val_loss = 0.0
                # print out BLEU scores and file write
                if self.print_bleu:
                    all_gen_cap = np.ndarray((val_features.shape[0], 101))
                    for i in range(n_iters_val):
                        val_features_batch = val_features[i *
                                                          self.batch_size:(i +
                                                                           1) *
                                                          self.batch_size]
                        val_captions_batch = val_captions[i *
                                                          self.batch_size:(i +
                                                                           1) *
                                                          self.batch_size]
                        feed_dict = {
                            self.model.features: val_features_batch,
                            self.model.captions: val_captions_batch
                        }
                        gen_cap, val_l = sess.run(
                            [generated_captions, val_loss],
                            feed_dict=feed_dict)  #### Added by Zaheer
                        all_gen_cap[i * self.batch_size:(i + 1) *
                                    self.batch_size] = gen_cap
                        curr_val_loss += val_l  ### Added by Zaheer
                        if i % 10 == 0:
                            val_summary = sess.run(val_loss_sum, feed_dict)
                            val_summary_writer.add_summary(
                                val_summary, e * n_iters_val + i)
                    print(curr_val_loss)
                    #### End
                    all_decoded = decode_captions(all_gen_cap,
                                                  self.model.idx_to_word)
                    GroundTruth_decoded = decode_captions(
                        self.val_data['captions'], self.model.idx_to_word)
                    print("Validation Sample Generated Caption")
                    sample_gen = random.randint(0, val_captions.shape[0] - 1)
                    print("Ground truth: %s" %
                          (GroundTruth_decoded[sample_gen]))
                    print("Generated caption: %s\n" % all_decoded[sample_gen])
                    save_pickle(all_decoded,
                                self.path + "test/test.candidate.captions.pkl")
                    scores = evaluate(data_path=self.path,
                                      split='test',
                                      get_scores=True)
                    write_bleu(scores=scores, path=self.path, epoch=e)

                # save model's parameters
                if (e + 1) % self.save_every == 0:
                    saver.save(sess,
                               os.path.join(self.model_path, 'model'),
                               global_step=e + 1)
                    print("model-%s saved." % (e + 1))

            #self.test(self.test_data,alphas=alph, betas=bts, sampled_captions=generated_captions)
        tf.reset_default_graph()
Пример #8
0
    def test(self,
             data,
             split='train',
             attention_visualization=True,
             save_sampled_captions=False):
        '''
        Args:
            - data: dictionary with the following keys:
            - features: Feature vectors of shape (5000, 196, 512)
            - file_names: Image file names of shape (5000, )
            - captions: Captions of shape (24210, 17)
            - image_idxs: Indices for mapping caption to image of shape (24210, )
            - features_to_captions: Mapping feature to captions (5000, 4~5)
            - split: 'train', 'val' or 'test'
            - attention_visualization: If True, visualize attention weights with images for each sampled word. (ipthon notebook)
            - save_sampled_captions: If True, save sampled captions to pkl file for computing BLEU scores.
        '''

        features = data['features']

        # build a graph to sample captions
        alphas, betas, sampled_captions = self.model.build_sampler(
            max_len=25)  # (N, max_len, L), (N, max_len)

        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            saver = tf.train.Saver()
            saver.restore(sess, self.test_model)
            features_batch, image_files = data['features'], data['file_names']
            feed_dict = {self.model.features: features_batch}
            alps, bts, sam_cap = sess.run(
                [alphas, betas, sampled_captions],
                feed_dict)  # (N, max_len, L), (N, max_len)
            decoded = decode_captions(sam_cap, self.model.idx_to_word)
            save_pickle(
                decoded,
                r".\image_data_to_be_labeled\Object_feature\our_data\train\val.candidate.captions.pkl"
            )

            if attention_visualization:
                for n in range(len(image_files)):
                    print("Sampled Caption: %s" % decoded[n])

                    # Plot original image
                    img = ndimage.imread(image_files[n])
                    plt.subplot(4, 5, 1)
                    plt.imshow(img)
                    plt.axis('off')

                    # Plot images with attention weights
                    words = decoded[n].split(" ")
                    for t in range(len(words)):
                        if t > 18:
                            break
                        plt.subplot(4, 5, t + 2)
                        plt.text(0,
                                 1,
                                 '%s(%.2f)' % (words[t], bts[n, t]),
                                 color='black',
                                 backgroundcolor='white',
                                 fontsize=8)
                        plt.imshow(img)
                        alp_curr = alps[n, t, :196].reshape(14, 14)
                        alp_img = skimage.transform.pyramid_expand(alp_curr,
                                                                   upscale=16,
                                                                   sigma=20)
                        plt.imshow(alp_img, alpha=0.85)
                        plt.axis('off')
                    plt.suptitle(decoded[n])
                    savename = 'caption_' + os.path.basename(
                        image_files[n]).rstrip('.jpg') + '.png'
                    plt.savefig(
                        os.path.join(
                            r'C:\Users\song\Desktop\511project\show-attend-and-tell-tensorflow\image_data_to_be_labeled\Object_feature\results',
                            savename))
                    plt.close('all')
                    # plt.show()

            ref_path = r'.\image_data_to_be_labeled\Object_feature\our_data\train\train.references.pkl'
            cand_path = r'.\image_data_to_be_labeled\Object_feature\our_data\train\val.candidate.captions.pkl'

            scores = evaluate(ref_path, cand_path, get_scores=True)
            #write_bleu(scores=scores, path=self.model_path, epoch=e)

            if save_sampled_captions:
                all_sam_cap = np.ndarray((features.shape[0], 20))
                num_iter = int(
                    np.ceil(float(features.shape[0]) / self.batch_size))
                for i in range(num_iter):
                    features_batch = features[i * self.batch_size:(i + 1) *
                                              self.batch_size]
                    feed_dict = {self.model.features: features_batch}
                    all_sam_cap[i * self.batch_size:(i + 1) *
                                self.batch_size] = sess.run(
                                    sampled_captions, feed_dict)
                all_decoded = decode_captions(all_sam_cap,
                                              self.model.idx_to_word)
                save_pickle(
                    all_decoded,
                    "./data/%s/%s.candidate.captions.pkl" % (split, split))
                         alpha_c=1.0,
                         selector=True,
                         dropout=True)

solver = CaptioningSolver(model,
                          data,
                          data,
                          n_epochs=20,
                          batch_size=128,
                          update_rule='adam',
                          learning_rate=0.0025,
                          print_every=2000,
                          save_every=1,
                          image_path='./image/val2014_resized',
                          pretrained_model=None,
                          model_path='./model/preview_model/',
                          test_model='./model/preview_model/model-20',
                          print_bleu=False,
                          log_path='./log/')

#solver.test(data, split='val')
#test = load_coco_data(data_path='./data/coco_data', split='test')
#tf.get_variable_scope().reuse_variables()
solver.test(data, split='test')
#evaluate(data_path='./data/coco_data', split='val')
evaluate(data_path='./data/coco_data', split='test')

#solver.test(data, split='test')
#
#evaluate(data_path='./data', split='test')
Пример #10
0
def main():
    batch_size = 32
    val_batch_size = 12
    save_every = 1
    #pretrained_model = None
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    model_path = 'model_residue_cascade_attention_detect_10/'
    # load val dataset to print out bleu scores every epoch
    #
    #word_to_idx =1
    sess = tf.Session()
    model = CaptionGenerator(sess,
                             word_to_idx,
                             dim_feature=[49, 2048],
                             dim_embed=512,
                             dim_hidden=512,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    n_examples = 117208
    val_data = load_coco_data(data_path='./data', split='val')
    n_iters_per_epoch = int(np.ceil(float(n_examples) / batch_size))
    with open('./data/train/train.captions.pkl', 'rb') as f:
        captions = pickle.load(f)
    with open('./data/train/train.image.idxs.pkl', 'rb') as f:
        image_idxs = pickle.load(f)
    print image_idxs
    val_features = val_data['features']
    print val_features.shape[0]
    n_iters_val = int(np.ceil(float(val_features.shape[0]) / val_batch_size))
    model.build()
    saver = tf.train.Saver()
    #variables = slim.get_variables_to_restore()
    #variables_to_restore = [v for v in variables if string.find(v.name, 'discriminator') == -1]
    #saver = tf.train.Saver(variables_to_restore)
    #if pretrained_model is not None:
    #saver = tf.train.import_meta_graph('./model_residue/model-10.meta')
    #   saver.restore(sess, pretrained_model)
    print 'start pre-traininig'
    for epoch in xrange(1, 10 + 1):
        rand_idxs = np.random.permutation(n_examples)
        captions = captions[rand_idxs]
        image_idxs = image_idxs[rand_idxs]
        for step in xrange(1, n_iters_per_epoch + 1):
            captions_batch = captions[step * batch_size:(step + 1) *
                                      batch_size]
            image_idxs_batch = image_idxs[step * batch_size:(step + 1) *
                                          batch_size]
            features_batch = np.empty((batch_size, 49, 2048))
            j = 0
            for i in image_idxs_batch:
                features_single = hickle.load('./data_residue_single/train/' +
                                              'train_' + str(i) +
                                              '.features.hkl')
                features_batch[j, :] = features_single
                j = j + 1

            features_detect_batch = np.empty((batch_size, 10, 4096))
            j = 0
            for i in image_idxs_batch:
                features_detect_single = hickle.load(
                    './data_residue_detect/train/' + 'train_' + str(i) +
                    '.features.hkl')
                features_detect_single = features_detect_single[-10:, :]
                features_detect_batch[j, :] = features_detect_single
                j = j + 1

            if captions_batch.shape[0] == batch_size:
                model.pre_train_batch(features_batch, features_detect_batch,
                                      captions_batch)
            if step % 10 == 0:
                print 'epoch', epoch
                print 'step', step
            if step % 512 == 0:
                all_gen_cap = np.ndarray((val_features.shape[0], 30))
                for i in range(n_iters_val):
                    features_batch = val_features[i * val_batch_size:(i + 1) *
                                                  val_batch_size]
                    val_detect_batch = np.empty(
                        (len(features_batch), 10, 4096))
                    m = 0
                    for j in range(i * val_batch_size,
                                   (i + 1) * val_batch_size):
                        val_detect_single = hickle.load(
                            './data_residue_detect/val/' + 'val_' + str(j) +
                            '.features.hkl')
                        val_detect_single = val_detect_single[-10:, :]
                        val_detect_batch[m, :] = val_detect_single
                        m = m + 1
                    _, _, _, _, gen_cap = model.generate(
                        features_batch, val_detect_batch)
                    all_gen_cap[i * val_batch_size:(i + 1) *
                                val_batch_size] = gen_cap
                all_decoded = decode_captions(all_gen_cap, model.idx_to_word)
                save_pickle(all_decoded,
                            "./data/val/val.candidate.captions.pkl")
                scores = evaluate(data_path='./data',
                                  split='val',
                                  get_scores=True)
                write_bleu(scores=scores, path=model_path, epoch=epoch)
                print "generative captions:%s\n" % all_decoded[0]

        if epoch % save_every == 0:
            saver.save(sess,
                       os.path.join(model_path, 'model'),
                       global_step=epoch)
            print "model-%s saved." % (epoch)
    print 'start reinforcement learning!'

    for epoch in xrange(1, 0 + 1):
        rand_idxs = np.random.permutation(n_examples)
        captions = captions[rand_idxs]
        image_idxs = image_idxs[rand_idxs]
        for step in xrange(1, n_iters_per_epoch + 1):
            captions_batch = captions[step * batch_size:(step + 1) *
                                      batch_size]
            image_idxs_batch = image_idxs[step * batch_size:(step + 1) *
                                          batch_size]
            features_batch = features[image_idxs_batch]
            if captions_batch.shape[0] == batch_size:
                #gen_cap = model.generate(features_batch)
                #decoded_cap = decode_captions(gen_cap, model.idx_to_word)
                #decoded_reference = decode_captions(captions_batch, model.idx_to_word)
                #scores = evaluate_part(candidate = decoded_cap, split = 'train', idx = image_idxs_batch, get_scores=True)
                #reward = (0.5*scores['Bleu_1']  + 0.5*scores['Bleu_2'] + scores['Bleu_3'] + scores['Bleu_4'])/3
                #print reward
                #reward = 1
                t = model.train_batch(features_batch, captions_batch)
            if step % 10 == 0:
                print 'epoch', epoch
                print 'step', step
                print 'time', t
            if step % 1024 == 0:
                ground_truths = captions[image_idxs == image_idxs_batch[0]]
                decoded = decode_captions(ground_truths, model.idx_to_word)
                for j, gt in enumerate(decoded):
                    print "Ground truth %d: %s" % (j + 1, gt)
                gen_caps = model.generate(features_batch)
                decoded = decode_captions(gen_caps, model.idx_to_word)
                print "Generated caption: %s\n" % decoded[0]

            if step % 1024 == 0:
                all_gen_cap = np.ndarray((val_features.shape[0], 30))
                for i in range(n_iters_val):
                    features_batch = val_features[i * batch_size:(i + 1) *
                                                  batch_size]
                    feed_dict = features_batch
                    gen_cap = model.generate(feed_dict)
                    all_gen_cap[i * batch_size:(i + 1) * batch_size] = gen_cap
                all_decoded = decode_captions(all_gen_cap, model.idx_to_word)
                save_pickle(all_decoded,
                            "./data/val/val.candidate.captions.pkl")
                scores = evaluate(data_path='./data',
                                  split='val',
                                  get_scores=True)
                write_bleu(scores=scores, path=model_path, epoch=epoch)
                #print "generative captions:%s\n"%all_decoded[0]
        if epoch % save_every == 0:
            saver.save(sess,
                       os.path.join(model_path, 'reinforcemodel'),
                       global_step=epoch)
            print "model-%s saved." % (epoch)