def get_merge_activations(self):
        '''
        In the model, we will merge the VGG image representation with
        the word embeddings. We need to feed the data as a list, in which
        the order of the elements in the list is _crucial_.
        '''

        self.data_generator = VisualWordDataGenerator(self.args,
                                                      input_dataset=self.args.checkpoint_dataset,
                                                      hsn=self.args.hidden_size)
        self.data_generator.set_vocabulary(self.args.checkpoint)
        self.vocab_len = len(self.data_generator.index2word)

        if not self.use_sourcelang:
            hsn_size = 0
        else:
            hsn_size = self.data_generator.hsn_size  # ick

        m = models.OneLayerLSTM(self.args.hidden_size, self.vocab_len,
                                self.args.dropin,
                                self.args.optimiser, self.args.l2reg,
                                hsn_size=hsn_size,
                                weights=self.args.checkpoint,
                                gru=self.args.gru)

        self.model =\
            m.buildMergeActivations(use_image=self.use_image,
                                    use_sourcelang=self.use_sourcelang)

        self.generate_activations('val')
    def get_hidden_activations(self):
        '''
        In the model, we will merge the VGG image representation with
        the word embeddings. We need to feed the data as a list, in which
        the order of the elements in the list is _crucial_.
        '''

        self.data_generator = VisualWordDataGenerator(self.args,
                                                      self.args.dataset)
        self.args.checkpoint = self.find_best_checkpoint()
        self.data_generator.set_vocabulary(self.args.checkpoint)
        self.vocab_len = len(self.data_generator.index2word)
        t = self.args.generation_timesteps if self.args.use_predicted_tokens else self.data_generator.max_seq_len

        m = models.NIC(self.args.embed_size, self.args.hidden_size,
                       self.vocab_len,
                       self.args.dropin,
                       self.args.optimiser, self.args.l2reg,
                       weights=self.args.checkpoint,
                       gru=self.args.gru,
                       t=t)

        self.fhs = m.buildHSNActivations(use_image=self.use_image)
        if self.args.use_predicted_tokens and self.args.no_image == False:
            gen_m = models.NIC(self.args.embed_size, self.args.hidden_size,
                               self.vocab_len,
                               self.args.dropin,
                               self.args.optimiser, self.args.l2reg,
                               weights=self.args.checkpoint,
                               gru=self.args.gru,
                               t=self.args.generation_timesteps)
            self.full_model = gen_m.buildKerasModel(use_image=self.use_image)

        self.new_generate_activations('train')
        self.new_generate_activations('val')
Exemplo n.º 3
0
 def prepare_datagenerator(self):
     self.data_gen = VisualWordDataGenerator(self.args, self.args.dataset)
     self.args.checkpoint = self.find_best_checkpoint()
     self.data_gen.set_vocabulary(self.args.checkpoint)
     self.vocab_len = len(self.data_gen.index2word)
     self.index2word = self.data_gen.index2word
     self.word2index = self.data_gen.word2index
Exemplo n.º 4
0
    def prepare_datagenerator(self):
        '''
        Initialise the data generator and its datastructures, unless a valid
        data generator was already passed into the
        GroundedTranslation.__init() function.
        '''

        # Initialise the data generator if it has not yet been initialised
        if self.data_generator == None:
            self.data_generator = VisualWordDataGenerator(
                self.args, self.args.dataset)

            # Extract the working vocabulary from the training dataset
            if self.args.existing_vocab != "":
                self.data_generator.set_vocabulary(self.args.existing_vocab)
            else:
                self.data_generator.extract_vocabulary()
        self.V = self.data_generator.get_vocab_size()