def classifier(self, features, inDim, aEmbeddings=None): with tf.variable_scope("classifier"): outDim = config.answerWordsNum dims = [inDim] + config.outClassifierDims + [outDim] if config.answerMod != "NON": dims[-1] = config.wrdEmbDim logits = ops.FCLayer( features, dims, batchNorm=self.batchNorm if config.outputBN else None, dropout=self.dropouts["output"]) if config.answerMod != "NON": logits = tf.nn.dropout(logits, self.dropouts["output"]) interactions = ops.mul(aEmbeddings, logits, dims[-1], interMod=config.answerMod) logits = ops.inter2logits(interactions, dims[-1], sumMod="SUM") logits += ops.getBias((outputDim, ), "ans") # answersWeights = tf.transpose(aEmbeddings) # if config.answerMod == "BL": # Wans = ops.getWeight((dims[-1], config.wrdEmbDim), "ans") # logits = tf.matmul(logits, Wans) # elif config.answerMod == "DIAG": # Wans = ops.getWeight((config.wrdEmbDim, ), "ans") # logits = logits * Wans # logits = tf.matmul(logits, answersWeights) return logits
def classifier(self, features, inDim, choices=None, choicesNums=None): with tf.variable_scope("classifier"): outDim = config.answerWordsNum dims = [inDim] + config.outClassifierDims + [outDim] if config.answerMod != "NON": dims[-1] = config.wrdAEmbDim logits = ops.FCLayer( features, dims, batchNorm=self.batchNorm if config.outputBN else None, dropout=self.dropouts["output"]) if config.answerMod != "NON": logits = ops.gatedAct(config.outAct, gate=config.outGate)(logits) logits = tf.nn.dropout(logits, self.dropouts["output"]) concat = {"x": config.answerBias} interactions, interDim = ops.mul(choices, logits, dims[-1], interMod=config.answerMod, concat=concat) logits = ops.inter2logits(interactions, interDim, sumMod=config.answerSumMod) if config.ansFormat == "oe": logits += ops.getBias((outDim, ), "ans") else: logits = ops.expMask(logits, choicesNums) return logits
def memAutoEnc(newMemory, info, control, name="", reuse=None): with tf.variable_scope("memAutoEnc" + name, reuse=reuse): # inputs to auto encoder features = info if config.autoEncMemInputs == "INFO" else newMemory features = ops.linear(features, config.memDim, config.ctrlDim, act=config.autoEncMemAct, name="aeMem") # reconstruct control if config.autoEncMemLoss == "CONT": loss = tf.reduce_mean(tf.squared_difference(control, features)) else: interactions, dim = ops.mul( self.questionCntxWords, features, config.ctrlDim, concat={"x": config.autoEncMemCnct}, mulBias=config.mulBias, name="aeMem") logits = ops.inter2logits(interactions, dim) logits = self.expMask(logits, self.questionLengths) # reconstruct word attentions if config.autoEncMemLoss == "PROB": loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.attentions["question"][-1], logits=logits)) # reconstruct control through words attentions else: attention = tf.nn.softmax(logits) summary = ops.att2Smry(attention, self.questionCntxWords) loss = tf.reduce_mean( tf.squared_difference(control, summary)) return loss
def control(self, controlInput, inWords, outWords, questionLengths, control, contControl=None, name="", reuse=None): with tf.variable_scope("control" + name, reuse=reuse): dim = config.ctrlDim ## Step 1: compute "continuous" control state given previous control and question. # control inputs: question and previous control newContControl = controlInput if config.controlFeedPrev: newContControl = control if config.controlFeedPrevAtt else contControl if config.controlFeedInputs: newContControl = tf.concat([newContControl, controlInput], axis=-1) dim += config.ctrlDim # merge inputs together newContControl = ops.linear(newContControl, dim, config.ctrlDim, act=config.controlContAct, name="contControl") dim = config.ctrlDim ## Step 2: compute attention distribution over words and sum them up accordingly. # compute interactions with question words interactions = tf.expand_dims(newContControl, axis=1) * inWords # optionally concatenate words if config.controlConcatWords: interactions = tf.concat([interactions, inWords], axis=-1) dim += config.ctrlDim # optional projection if config.controlProj: interactions = ops.linear(interactions, dim, config.ctrlDim, act=config.controlProjAct) dim = config.ctrlDim # compute attention distribution over words and summarize them accordingly logits = ops.inter2logits(interactions, dim) # self.interL = (interW, interb) # if config.controlCoverage: # logits += coverageBias * coverage attention = tf.nn.softmax(ops.expMask(logits, questionLengths)) self.attentions["question"].append(attention) # if config.controlCoverage: # coverage += attention # Add logits instead? newControl = ops.att2Smry(attention, outWords) # ablation: use continuous control (pre-attention) instead if config.controlContinuous: newControl = newContControl return newControl, newContControl