示例#1
0
    def classifier(self, features, inDim, aEmbeddings=None):
        with tf.variable_scope("classifier"):
            outDim = config.answerWordsNum
            dims = [inDim] + config.outClassifierDims + [outDim]
            if config.answerMod != "NON":
                dims[-1] = config.wrdEmbDim

            logits = ops.FCLayer(
                features,
                dims,
                batchNorm=self.batchNorm if config.outputBN else None,
                dropout=self.dropouts["output"])

            if config.answerMod != "NON":
                logits = tf.nn.dropout(logits, self.dropouts["output"])
                interactions = ops.mul(aEmbeddings,
                                       logits,
                                       dims[-1],
                                       interMod=config.answerMod)
                logits = ops.inter2logits(interactions, dims[-1], sumMod="SUM")
                logits += ops.getBias((outputDim, ), "ans")

                # answersWeights = tf.transpose(aEmbeddings)

                # if config.answerMod == "BL":
                #     Wans = ops.getWeight((dims[-1], config.wrdEmbDim), "ans")
                #     logits = tf.matmul(logits, Wans)
                # elif config.answerMod == "DIAG":
                #     Wans = ops.getWeight((config.wrdEmbDim, ), "ans")
                #     logits = logits * Wans

                # logits = tf.matmul(logits, answersWeights)

        return logits
示例#2
0
    def classifier(self, features, inDim, choices=None, choicesNums=None):
        with tf.variable_scope("classifier"):
            outDim = config.answerWordsNum
            dims = [inDim] + config.outClassifierDims + [outDim]
            if config.answerMod != "NON":
                dims[-1] = config.wrdAEmbDim

            logits = ops.FCLayer(
                features,
                dims,
                batchNorm=self.batchNorm if config.outputBN else None,
                dropout=self.dropouts["output"])

            if config.answerMod != "NON":
                logits = ops.gatedAct(config.outAct,
                                      gate=config.outGate)(logits)
                logits = tf.nn.dropout(logits, self.dropouts["output"])
                concat = {"x": config.answerBias}
                interactions, interDim = ops.mul(choices,
                                                 logits,
                                                 dims[-1],
                                                 interMod=config.answerMod,
                                                 concat=concat)
                logits = ops.inter2logits(interactions,
                                          interDim,
                                          sumMod=config.answerSumMod)
                if config.ansFormat == "oe":
                    logits += ops.getBias((outDim, ), "ans")
                else:
                    logits = ops.expMask(logits, choicesNums)

        return logits
示例#3
0
    def memAutoEnc(newMemory, info, control, name="", reuse=None):
        with tf.variable_scope("memAutoEnc" + name, reuse=reuse):
            # inputs to auto encoder
            features = info if config.autoEncMemInputs == "INFO" else newMemory
            features = ops.linear(features,
                                  config.memDim,
                                  config.ctrlDim,
                                  act=config.autoEncMemAct,
                                  name="aeMem")

            # reconstruct control
            if config.autoEncMemLoss == "CONT":
                loss = tf.reduce_mean(tf.squared_difference(control, features))
            else:
                interactions, dim = ops.mul(
                    self.questionCntxWords,
                    features,
                    config.ctrlDim,
                    concat={"x": config.autoEncMemCnct},
                    mulBias=config.mulBias,
                    name="aeMem")

                logits = ops.inter2logits(interactions, dim)
                logits = self.expMask(logits, self.questionLengths)

                # reconstruct word attentions
                if config.autoEncMemLoss == "PROB":
                    loss = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(
                            labels=self.attentions["question"][-1],
                            logits=logits))

                # reconstruct control through words attentions
                else:
                    attention = tf.nn.softmax(logits)
                    summary = ops.att2Smry(attention, self.questionCntxWords)
                    loss = tf.reduce_mean(
                        tf.squared_difference(control, summary))

        return loss
示例#4
0
    def control(self,
                controlInput,
                inWords,
                outWords,
                questionLengths,
                control,
                contControl=None,
                name="",
                reuse=None):

        with tf.variable_scope("control" + name, reuse=reuse):
            dim = config.ctrlDim

            ## Step 1: compute "continuous" control state given previous control and question.
            # control inputs: question and previous control
            newContControl = controlInput
            if config.controlFeedPrev:
                newContControl = control if config.controlFeedPrevAtt else contControl
                if config.controlFeedInputs:
                    newContControl = tf.concat([newContControl, controlInput],
                                               axis=-1)
                    dim += config.ctrlDim

                # merge inputs together
                newContControl = ops.linear(newContControl,
                                            dim,
                                            config.ctrlDim,
                                            act=config.controlContAct,
                                            name="contControl")
                dim = config.ctrlDim

            ## Step 2: compute attention distribution over words and sum them up accordingly.
            # compute interactions with question words
            interactions = tf.expand_dims(newContControl, axis=1) * inWords

            # optionally concatenate words
            if config.controlConcatWords:
                interactions = tf.concat([interactions, inWords], axis=-1)
                dim += config.ctrlDim

            # optional projection
            if config.controlProj:
                interactions = ops.linear(interactions,
                                          dim,
                                          config.ctrlDim,
                                          act=config.controlProjAct)
                dim = config.ctrlDim

            # compute attention distribution over words and summarize them accordingly
            logits = ops.inter2logits(interactions, dim)
            # self.interL = (interW, interb)

            # if config.controlCoverage:
            #     logits += coverageBias * coverage

            attention = tf.nn.softmax(ops.expMask(logits, questionLengths))
            self.attentions["question"].append(attention)

            # if config.controlCoverage:
            #     coverage += attention # Add logits instead?

            newControl = ops.att2Smry(attention, outWords)

            # ablation: use continuous control (pre-attention) instead
            if config.controlContinuous:
                newControl = newContControl

        return newControl, newContControl