def get_merge_activations(self): ''' In the model, we will merge the VGG image representation with the word embeddings. We need to feed the data as a list, in which the order of the elements in the list is _crucial_. ''' self.data_generator = VisualWordDataGenerator(self.args, input_dataset=self.args.checkpoint_dataset, hsn=self.args.hidden_size) self.data_generator.set_vocabulary(self.args.checkpoint) self.vocab_len = len(self.data_generator.index2word) if not self.use_sourcelang: hsn_size = 0 else: hsn_size = self.data_generator.hsn_size # ick m = models.OneLayerLSTM(self.args.hidden_size, self.vocab_len, self.args.dropin, self.args.optimiser, self.args.l2reg, hsn_size=hsn_size, weights=self.args.checkpoint, gru=self.args.gru) self.model =\ m.buildMergeActivations(use_image=self.use_image, use_sourcelang=self.use_sourcelang) self.generate_activations('val')
def get_hidden_activations(self): ''' In the model, we will merge the VGG image representation with the word embeddings. We need to feed the data as a list, in which the order of the elements in the list is _crucial_. ''' self.data_generator = VisualWordDataGenerator(self.args, self.args.dataset) self.args.checkpoint = self.find_best_checkpoint() self.data_generator.set_vocabulary(self.args.checkpoint) self.vocab_len = len(self.data_generator.index2word) t = self.args.generation_timesteps if self.args.use_predicted_tokens else self.data_generator.max_seq_len m = models.NIC(self.args.embed_size, self.args.hidden_size, self.vocab_len, self.args.dropin, self.args.optimiser, self.args.l2reg, weights=self.args.checkpoint, gru=self.args.gru, t=t) self.fhs = m.buildHSNActivations(use_image=self.use_image) if self.args.use_predicted_tokens and self.args.no_image == False: gen_m = models.NIC(self.args.embed_size, self.args.hidden_size, self.vocab_len, self.args.dropin, self.args.optimiser, self.args.l2reg, weights=self.args.checkpoint, gru=self.args.gru, t=self.args.generation_timesteps) self.full_model = gen_m.buildKerasModel(use_image=self.use_image) self.new_generate_activations('train') self.new_generate_activations('val')
def prepare_datagenerator(self): self.data_gen = VisualWordDataGenerator(self.args, self.args.dataset) self.args.checkpoint = self.find_best_checkpoint() self.data_gen.set_vocabulary(self.args.checkpoint) self.vocab_len = len(self.data_gen.index2word) self.index2word = self.data_gen.index2word self.word2index = self.data_gen.word2index
def prepare_datagenerator(self): ''' Initialise the data generator and its datastructures, unless a valid data generator was already passed into the GroundedTranslation.__init() function. ''' # Initialise the data generator if it has not yet been initialised if self.data_generator == None: self.data_generator = VisualWordDataGenerator( self.args, self.args.dataset) # Extract the working vocabulary from the training dataset if self.args.existing_vocab != "": self.data_generator.set_vocabulary(self.args.existing_vocab) else: self.data_generator.extract_vocabulary() self.V = self.data_generator.get_vocab_size()