示例#1
0
class BaseModel(object):
    def __init__(self, config):
        self.config = config
        #self.is_train = True
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0,
                                       name = 'global_step',
                                       trainable = False)

        self.build()

    def get_imagefeatures(self, image_files, batch_size):
        return self.image_loader.extract_features_vgg19(self.trained_model, image_files, batch_size)

    def build(self):
        # use pretrained vgg model to extract image features
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input= net.input, output= net.get_layer('fc2').output)

        self.object_model = self.get_mod()
        self.object_model.load_params('./img2poem/model/object.params')

        self.sentiment_model = self.get_mod(sym = img2poem.symbol_sentiment.get_sym(), img_len = 227)
        self.sentiment_model.load_params('./img2poem/model/Sentiment.params')

        self.scene_model = self.get_mod()
        self.scene_model.load_params('./img2poem/model/scene.params')
        #self.sentiment_model = Model(input= net.input, output= net.get_layer('block3_conv1').output)

    def get_mod(self, output_name = 'relu7_output', sym = None, img_len = 224):
        if sym is None:
            vgg = VGG()
            sym = vgg.get_symbol(num_classes = 1000, 
                      blocks = [(2, 64),
                                (2, 128),
                                (3, 256), 
                                (3, 512),
                                (3, 512)])
            internals = sym.get_internals()
            sym = internals[output_name]
        ctx = mx.cpu()
        mod = mx.module.Module(
                context = ctx,
                symbol = sym,
                data_names = ("data", ),
                label_names = ()
        )

        mod.bind(data_shapes = [("data", (1, 3, 224, 224))], for_training = False)

        return mod

    def train(self, sess, train_data):
        raise NotImplementedError()

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

        # Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
        #for k in range(1):
            batch = eval_data.next_batch()
            #caption_data = self.beam_search(sess, batch, vocabulary)
            images = self.image_loader.load_images(batch)
            caption_data, scores = sess.run([self.predictions, self.probs], feed_dict={self.images: images})
            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size-fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = str(vocabulary.get_sentence(word_idxs))
                results.append({'image_id': int(eval_data.image_ids[idx]),
                                'caption': caption})
                #print(results)
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = mpimg.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(os.path.join(config.eval_result_dir,
                                             image_name+'_result.jpg'))

        fp = open(config.eval_result_file, 'w')
        json.dump(results, fp)
        fp.close()

        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            images = self.image_loader.load_images(batch)
            caption_data,scores_data = sess.run([self.predictions,self.probs],feed_dict={self.images:images})

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size-fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = vocabulary.get_sentence(word_idxs)
                print(caption)
                captions.append(caption)
                scores.append(scores_data[l])

                # Save the result in an image file
                image_file = batch[l]
                image_name = image_file.split(os.sep)[-1]
                image_name = os.path.splitext(image_name)[0]
                img = mpimg.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(os.path.join(config.test_result_dir,
                                         image_name+'_result.jpg'))

        ##Save the captions to a file
        results = pd.DataFrame({'image_files':test_data.image_files,
                                'caption':captions,
                                'prob':scores})
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_step.eval()))

        print((" Saving the model to %s..." % (save_path+".npy")))
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step)+".npy")

        print("Loading the model from %s..." %save_path)
        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." %count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("All variables present...")
        for var in tf.all_variables():
            print(var)
        with tf.variable_scope('conv1_1',reuse = True):
            kernel = tf.get_variable('conv1_1_W')

        print("Loading the CNN from %s..." %data_path)
        data_dict = np.load(data_path,encoding='latin1')
        count = 0
        for param_name in tqdm(data_dict.keys()):
            op_name = param_name[:-2]
            print(param_name)
            #print(op_name)
            with tf.variable_scope(op_name, reuse = True):
                try:
                    var = tf.get_variable(param_name)
                    session.run(var.assign(data_dict[param_name]))
                    count += 1
                except ValueError:
                    print("No such variable")
                    pass

        print("%d tensors loaded." %count)
示例#2
0
class DataTestLoader():
    def __init__(self, config, batch_size, end_token=0):
        self.config = config
        self.batch_size = batch_size
        self.end_token = end_token
        self.image_batch = None
        self.feature_batch = None
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input=net.input,
                                   output=net.get_layer('fc2').output)

    def get_imagefeatures_vgg19(self, image_files, feature_files):
        #print("to extract features...")
        return self.image_loader.extract_features_vgg19(
            self.trained_model, image_files, feature_files,
            self.batch_size)  #extract image features using vgg19

    def next_batch(self):
        imgs = None
        features = None

        if self.image_batch is not None:
            imgs = self.image_batch[self.pointer]
            feat_file = self.feature_batch[self.pointer]
            conv = np.array(self.get_imagefeatures_vgg19(imgs, feat_file))
        else:
            print("no image files")

        self.pointer = (self.pointer + 1) % self.num_batch

        return imgs, feat_file, conv

    def reset_pointer(self):
        self.pointer = 0

    def create_batches(self, with_image=True):

        self.pointer = 0
        config = self.config

        if with_image:
            data = pd.read_csv(config.test_temp_file)
            image_files = []
            feature_files = []
            for _, img, feat in data.values:
                image_files.append(img)
                feature_files.append(feat)
            #print("len image files: " + str(len(image_files)))
            #print("len feature files: " + str(len(feature_files)))
            self.num_batch = int(len(image_files) / self.batch_size)
            #print("num batch" + str(self.num_batch))

            image_files = image_files[:self.num_batch * self.batch_size]
            feature_files = feature_files[:self.num_batch * self.batch_size]

            #print("len image files: " + str(len(image_files)))
            #print("len feature files: " + str(len(feature_files)))

            self.image_batch = np.array(
                np.split(np.array(image_files), self.num_batch, 0))
            self.feature_batch = np.array(
                np.split(np.array(feature_files), self.num_batch, 0))

        else:
            image_files = None
            feature_files = None

    def get_sample_features(self):
        data = np.load(config.temp_sample_image_file).item()
        imgs = data['images']
        features = data['features']

        return imgs, features

    def reset_image_pointer(self):
        self.image_pointer = 0
示例#3
0
class DisDataloader():
    def __init__(self, config, batch_size, seq_length):
        self.config = config
        self.batch_size = batch_size
        self.sentences = np.array([])
        self.labels = np.array([])
        self.seq_length = seq_length

        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input=net.input,
                                   output=net.get_layer('fc2').output)

    def get_imagefeatures_vgg19(self, image_files, feature_files):
        #print("to extract features...")
        return self.image_loader.extract_features_vgg19(
            self.trained_model, image_files, feature_files, self.batch_size)

    def load_train_data(self, with_image):
        # Load data
        #pos: oracle, neg: generated samples
        data = np.load(self.config.temp_generate_file).item()
        #data = {'feature_files': feature_files, 'real_samples': real_samples, 'generated_samples': generated_samples}
        positive_examples = data['real_samples']
        negative_examples = data['generated_samples']
        feature_files = data['feature_files']
        image_files = data['image_files']

        if with_image:  #same order as postive and negative examples
            feature_files = np.concatenate([feature_files, feature_files], 0)
            image_files = np.concatenate([image_files, image_files], 0)

        # Generate labels
        positive_labels = [[0, 1] for _ in positive_examples]
        negative_labels = [[1, 0] for _ in negative_examples]
        self.labels = np.concatenate([positive_labels, negative_labels], 0)

        # Split batches

        #self.sentences = np.array(positive_examples + negative_examples)
        self.sentences = np.concatenate([positive_examples, negative_examples],
                                        0)
        self.num_batch = int(len(self.labels) / self.batch_size)
        self.sentences = self.sentences[:self.num_batch * self.batch_size]
        self.labels = self.labels[:self.num_batch * self.batch_size]
        #debug = self.labels
        if with_image:

            feature_files = feature_files[:self.num_batch * self.batch_size]
            image_files = image_files[:self.num_batch * self.batch_size]
            self.labels, self.sentences, feature_files, image_files = shuffle(
                self.labels, self.sentences, feature_files, image_files)

            self.feature_batch = np.split(np.array(feature_files),
                                          self.num_batch, 0)
            self.image_batch = np.split(np.array(image_files), self.num_batch,
                                        0)
        else:
            self.labels, self.sentences = shuffle(self.labels, self.sentences)

        self.sentences_batches = np.split(self.sentences, self.num_batch, 0)
        self.labels_batches = np.split(self.labels, self.num_batch, 0)

        self.pointer = 0

    def print_sample(self, array):
        return
        for i in range(10):
            print(str(array[i]))

    def next_batch(self):
        sent = self.sentences_batches[self.pointer]
        lab = self.labels_batches[self.pointer]
        imgs = None
        features = None
        if self.image_batch:
            imgs = self.image_batch[self.pointer]
            feature_files = self.feature_batch[self.pointer]
            features = self.get_imagefeatures_vgg19(imgs, feature_files)
        else:
            print("no image files")

        self.pointer = (self.pointer + 1) % self.num_batch
        return sent, lab, features

    def reset_pointer(self):
        self.pointer = 0
示例#4
0
class DataLoader():
    def __init__(self, config, batch_size, seq_length, end_token=0):
        self.config = config
        self.batch_size = batch_size
        self.token_stream = []
        self.seq_length = seq_length
        self.end_token = end_token
        self.image_batch = None
        self.feature_batch = None
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input=net.input,
                                   output=net.get_layer('fc2').output)

    def get_imagefeatures_vgg19(self, image_files, feature_files):
        #print("to extract features...")
        return self.image_loader.extract_features_vgg19(
            self.trained_model, image_files, feature_files,
            self.batch_size)  #extract image features using vgg19

    def next_batch(self):
        seq = self.sequence_batch[self.pointer]
        imgs = None
        features = None

        if self.image_batch is not None:
            imgs = self.image_batch[self.pointer]
            feat_file = self.feature_batch[self.pointer]
            conv = np.array(self.get_imagefeatures_vgg19(imgs, feat_file))
        else:
            print("no image files")

        self.pointer = (self.pointer + 1) % self.num_batch

        return seq, imgs, feat_file, conv

    def reset_pointer(self):
        self.pointer = 0

    def create_shuffled_batches(self, with_image=True):

        self.pointer = 0
        config = self.config

        data = np.load(config.temp_data_file).item()
        word_idxs = data['word_idxs']
        sent_lens = data['sentence_len']
        print("len word_idxs: " + str(len(word_idxs)))

        self.num_batch = int(len(word_idxs) / self.batch_size)
        print("num batch " + str(self.num_batch))
        print('batch_size' + str(self.batch_size))
        print(self.num_batch * self.batch_size)
        word_idxs = word_idxs[:self.num_batch * self.batch_size]

        #self.pointer = 0

        if with_image:
            with open(config.temp_image_file) as ifile:
                image_files = ifile.read().splitlines()
            with open(config.temp_feature_file) as ffile:
                feature_files = ffile.read().splitlines()

            image_files = image_files[:self.num_batch * self.batch_size]
            feature_files = feature_files[:self.num_batch * self.batch_size]

            print("len image files: " + str(len(image_files)))
            print("len feature files: " + str(len(feature_files)))
            print("len word_idxs: " + str(len(word_idxs)))

            word_idxs, feature_files, image_files = shuffle(
                word_idxs, feature_files, image_files)

            self.sequence_batch = np.array(
                np.split(word_idxs, self.num_batch, 0))
            self.image_batch = np.array(
                np.split(np.array(image_files), self.num_batch, 0))
            self.feature_batch = np.array(
                np.split(np.array(feature_files), self.num_batch, 0))

        else:
            image_files = None
            feature_files = None
            word_idxs = shuffle(word_idxs)
            self.sequence_batch = np.split(word_idxs, self.num_batch, 0)

        print('shape of sequence_batch:' + str(self.sequence_batch.shape))
        print('shape of image:' + str(self.image_batch.shape))
        print('shape of features:' + str(self.feature_batch.shape))

    def get_sample_features(self):
        data = np.load(config.temp_sample_image_file).item()
        imgs = data['images']
        features = data['features']

        return imgs, features

    def reset_image_pointer(self):
        self.image_pointer = 0
示例#5
0
class DataLoader():
    def __init__(self, config, batch_size, seq_length, end_token=0):
        self.config = config
        self.batch_size = batch_size
        self.token_stream = []
        self.seq_length = seq_length
        self.end_token = end_token
        self.image_batch = None
        self.feature_batch = None
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input= net.input, output= net.get_layer('fc2').output)

    def get_imagefeatures_vgg19(self, image_files, feature_files):
        #print("to extract features...")
        return self.image_loader.extract_features_vgg19(self.trained_model, image_files, feature_files, self.batch_size) #extract image features using vgg19


    def create_batches(self, config, with_image):
        self.token_stream = []

        with open(config.temp_oracle_file, 'r') as raw:
            for line in raw:
                line = line.strip().split()
                parse_line = [int(x) for x in line]
                if len(parse_line) > self.seq_length:
                    self.token_stream.append(parse_line[:self.seq_length])
                else:
                    while len(parse_line) < self.seq_length:
                        parse_line.append(self.end_token)
                    if len(parse_line) == self.seq_length:
                        self.token_stream.append(parse_line)

        self.num_batch = int(len(self.token_stream) / self.batch_size)
        self.token_stream = self.token_stream[:self.num_batch * self.batch_size]
        self.sequence_batch = np.split(np.array(self.token_stream), self.num_batch, 0)
        self.pointer = 0

        if with_image:
            with open(config.temp_image_file) as ifile:
                self.image_files = ifile.read().splitlines()
            with open(config.temp_feature_file) as ffile:
                self.feature_files = ffile.read().splitlines()

            self.image_files = self.image_files[:self.num_batch * self.batch_size]
            self.image_batch = np.split(np.array(self.image_files), self.num_batch, 0)
            self.feature_files = self.feature_files[:self.num_batch * self.batch_size]
            self.feature_batch = np.split(np.array(self.feature_files), self.num_batch, 0)

    def create_batches_v2(self, config, with_image):
        data = np.load(config.temp_data_file).item()
        word_idxs = data['word_idxs']
        sent_lens = data['sentence_len']

        self.num_batch = int(len(word_idxs) / self.batch_size)
        word_idxs = word_idxs[:self.num_batch * self.batch_size]
        self.sequence_batch = np.split(word_idxs, self.num_batch, 0)
        self.pointer = 0

        if with_image:
            with open(config.temp_image_file) as ifile:
                self.image_files = ifile.read().splitlines()
            with open(config.temp_feature_file) as ffile:
                self.feature_files = ffile.read().splitlines()

            self.image_files = self.image_files[:self.num_batch * self.batch_size]
            self.image_batch = np.split(np.array(self.image_files), self.num_batch, 0)
            self.feature_files = self.feature_files[:self.num_batch * self.batch_size]
            self.feature_batch = np.split(np.array(self.feature_files), self.num_batch, 0)

    def next_batch(self):
        ret = self.sequence_batch[self.pointer]
        imgs = None
        features = None
        if self.image_batch:
            imgs = self.image_batch[self.pointer]
            feature_files = self.feature_batch[self.pointer]
            features = self.get_imagefeatures_vgg19(imgs, feature_files)
        else:
            print("no image files")
        self.pointer = (self.pointer + 1) % self.num_batch

        return ret, features

    def reset_pointer(self):
        self.pointer = 0