Пример #1
0
    def trainFunc(self, inputPremise, inputHypothesis, yTarget, learnRate, gradMax,
                  L2regularization):
        premProject = T.dot(inputPremise, self.W_proj)
        hypoProject = T.dot(inputHypothesis, self.W_proj)

        sumPrem = premProject.sum(axis=1) + self.b_proj
        sumHypo = hypoProject.sum(axis=1) + self.b_proj # Should be dim (n, dimProject) where n is batch size

        concatVec = T.concatenate([sumPrem, sumHypo], axis=1)

        activeVec = T.tanh(concatVec)

        yPred = T.nnet.softmax(activeVec)
        entropy = T.nnet.categorical_crossentropy(yPred, yTarget).mean()
        cost = entropy + computeParamNorms([self.W_proj], L2regularization)

        costFunc = theano.function([inputPremise, inputHypothesis, yTarget], cost)

        grads, _ = computeGrads(inputPremise, inputHypothesis, yTarget,
                                cost, gradMax, self.params.values())

        fGradShared, fUpdate = rmsprop(grads, learnRate, inputPremise,
                                       inputHypothesis, yTarget, cost, self.params)

        return fGradShared, fUpdate, costFunc
Пример #2
0
    def trainFunc(self, inputPremise, inputHypothesis, yTarget, learnRate,
                  gradMax, L2regularization):
        premProject = T.dot(inputPremise, self.W_proj)
        hypoProject = T.dot(inputHypothesis, self.W_proj)

        sumPrem = premProject.sum(axis=1) + self.b_proj
        sumHypo = hypoProject.sum(
            axis=1
        ) + self.b_proj  # Should be dim (n, dimProject) where n is batch size

        concatVec = T.concatenate([sumPrem, sumHypo], axis=1)

        activeVec = T.tanh(concatVec)

        yPred = T.nnet.softmax(activeVec)
        entropy = T.nnet.categorical_crossentropy(yPred, yTarget).mean()
        cost = entropy + computeParamNorms([self.W_proj], L2regularization)

        costFunc = theano.function([inputPremise, inputHypothesis, yTarget],
                                   cost)

        grads, _ = computeGrads(inputPremise, inputHypothesis, yTarget, cost,
                                gradMax, self.params.values())

        fGradShared, fUpdate = rmsprop(grads, learnRate, inputPremise,
                                       inputHypothesis, yTarget, cost,
                                       self.params)

        return fGradShared, fUpdate, costFunc
Пример #3
0
    def costFunc(
        self,
        inputPremise,
        inputHypothesis,
        yTarget,
        layer,
        L2regularization,
        dropoutRate,
        premiseOutputs,
        batchSize,
        sentenceAttention=False,
        wordwiseAttention=False,
        numTimestepsHypothesis=1,
        numTimestepsPremise=1,
    ):
        """
        Compute end-to-end cost function for a collection of input data.
        :param layer: whether we are doing a forward computation in the
                        premise or hypothesis layer
        :return: Symbolic expression for cost function as well as theano function
                 for computing cost expression.
        """
        if layer == "premise":
            _ = self.forwardRun(inputPremise, numTimestepsPremise)
        elif layer == "hypothesis":
            timestepOut, _ = self.forwardRun(inputHypothesis, numTimestepsHypothesis)

        # Apply sentence level attention -- notation consistent with paper
        if sentenceAttention:
            hstar = self.applySentenceAttention(premiseOutputs, self.finalOutputVal, numTimestepsPremise)
            self.finalOutputVal = hstar

        # Apply word by word attention
        if wordwiseAttention:
            hstar = self.applyWordwiseAttention(
                premiseOutputs,
                timestepOut[0],
                self.finalOutputVal,
                batchSize,
                numTimestepsPremise,
                numTimestepsHypothesis,
            )
            self.finalOutputVal = hstar

        # Apply dropout here before projecting to categories? apply to finalOutputVal
        self.finalOutputVal = self.applyDropout(self.finalOutputVal, self.dropoutMode, dropoutRate)
        catOutput = self.projectToCategories()
        cost = self.computeCrossEntropyCost(catOutput, yTarget)

        # Get params specific to cell and add L2 regularization to cost
        LSTMparams = [self.params[cParam] for cParam in self.LSTMcellParams]
        cost = cost + computeParamNorms(LSTMparams, L2regularization)
        return (
            cost,
            theano.function(
                [inputPremise, inputHypothesis, yTarget], cost, name="LSTM_cost_function", on_unused_input="warn"
            ),
        )
Пример #4
0
def testRegularization():
    layer = HiddenLayer(2, 2, 2, "test", numCategories=3)

    premise = T.ftensor3("testP")
    hypothesis = T.ftensor3("testH")
    yTarget = T.fmatrix("testyTarget")

    hyp = np.array([[[0.5, 0.6]], [[0.3, 0.8]]], dtype=np.float32)
    prem = np.array([[[0.5, 0.6]], [[0.3, 0.8]]], dtype=np.float32)
    yTargetNP = np.array([[0., 1., 0.]], dtype=np.float32)

    layer.printLayerParams()
    cost, fn = layer.costFunc(premise, hypothesis, yTarget, "hypothesis", 0.0, 1)
    costValue = fn(prem, hyp, yTargetNP)
    print "Cost: ", costValue

    LSTMparams = [layer.params[cParam] for cParam in layer.LSTMcellParams]
    print "L2 norm all params: ", computeParamNorms(layer.params.values(), 0.5).eval()
    print "L2 norm LSTM cell params: ", computeParamNorms(LSTMparams, 0.5).eval()
Пример #5
0
    def costFunc(self,
                 inputPremise,
                 inputHypothesis,
                 yTarget,
                 layer,
                 L2regularization,
                 dropoutRate,
                 premiseOutputs,
                 batchSize,
                 sentenceAttention=False,
                 wordwiseAttention=False,
                 numTimestepsHypothesis=1,
                 numTimestepsPremise=1):
        """
        Compute end-to-end cost function for a collection of input data.
        :param layer: whether we are doing a forward computation in the
                        premise or hypothesis layer
        :return: Symbolic expression for cost function as well as theano function
                 for computing cost expression.
        """
        if layer == "premise":
            _ = self.forwardRun(inputPremise, numTimestepsPremise)
        elif layer == "hypothesis":
            timestepOut, _ = self.forwardRun(inputHypothesis,
                                             numTimestepsHypothesis)

        # Apply sentence level attention -- notation consistent with paper
        if sentenceAttention:
            hstar = self.applySentenceAttention(premiseOutputs,
                                                self.finalOutputVal,
                                                numTimestepsPremise)
            self.finalOutputVal = hstar

        # Apply word by word attention
        if wordwiseAttention:
            hstar = self.applyWordwiseAttention(premiseOutputs, timestepOut[0],
                                                self.finalOutputVal, batchSize,
                                                numTimestepsPremise,
                                                numTimestepsHypothesis)
            self.finalOutputVal = hstar

        # Apply dropout here before projecting to categories? apply to finalOutputVal
        self.finalOutputVal = self.applyDropout(self.finalOutputVal,
                                                self.dropoutMode, dropoutRate)
        catOutput = self.projectToCategories()
        cost = self.computeCrossEntropyCost(catOutput, yTarget)

        # Get params specific to cell and add L2 regularization to cost
        LSTMparams = [self.params[cParam] for cParam in self.LSTMcellParams]
        cost = cost + computeParamNorms(LSTMparams, L2regularization)
        return cost, theano.function([inputPremise, inputHypothesis, yTarget],
                                     cost,
                                     name='LSTM_cost_function',
                                     on_unused_input="warn")