def embedFact(self, factIdx): # Caption if factIdx == 0: seq, seqLens = self.captionEmbed, self.captionLens factEmbed, states = utils.dynamicRNN(self.factRNN, seq, seqLens, returnStates=True) # states: hidden state & cell states(two layers) . factEmbed: rnn output(one layer) # QA pairs elif factIdx > 0: quesTokens, quesLens = \ self.questionTokens[factIdx - 1], self.questionLens[factIdx - 1] ansTokens, ansLens = \ self.answerTokens[factIdx - 1], self.answerLengths[factIdx - 1] qaTokens = utils.concatPaddedSequences( # concat non-0-token (q,a) and pad with 0 to maxlength quesTokens, quesLens, ansTokens, ansLens, padding='right') qa = self.wordEmbed(qaTokens) qaLens = quesLens + ansLens # states: hidden state & cell states(two layers 2*2*20*512) . factEmbed: rnn output(one layer 20*512) qaEmbed, states = utils.dynamicRNN(self.factRNN, qa, qaLens, returnStates=True) factEmbed = qaEmbed factRNNstates = states # 2[1,20,512] self.factEmbeds.append((factEmbed, factRNNstates))
def embedAnswer(self, aIdx): '''Embed questions''' ansIn = self.answerEmbeds[aIdx] ansLens = self.answerLengths[aIdx] aEmbed, states = utils.dynamicRNN( self.ansRNN, ansIn, ansLens, returnStates=True) ansRNNStates = states self.answerRNNStates.append((aEmbed, ansRNNStates))
def embedQuestion( self, qIdx): # find the longest sentense(chat_processed_data.h5) and pad #pack = nn_utils.rnn.pack_padded_sequence(tensor_in, seq_lengths, batch_first=True) '''Embed questions''' quesIn = self.questionEmbeds[qIdx] quesLens = self.questionLens[qIdx] if self.useIm == 'early': image = self.imageEmbed.unsqueeze(1).repeat(1, quesIn.size(1), 1) quesIn = torch.cat([quesIn, image], 2) qEmbed, states = utils.dynamicRNN(self.quesRNN, quesIn, quesLens, returnStates=True) quesRNNstates = states #2[1,20,512] self.questionRNNStates.append((qEmbed, quesRNNstates))