示例#1
0
    def set_up_attention(self):
        self.attention_has_been_set_up = True
        if not self.config.p.attention: return

        #print 'attention', len(self.attention_memory),len(self.attention_memory[0])

        prestack = [
            nn.ConcatNode([layer[i]
                           for layer in self.attention_memory], self.g)
            for i in range(len(self.attention_memory[0]))
        ]
        #print prestack
        self.stacked_attention_memory = nn.StackNode(prestack, self.g)
        #print 'stacked_memory.shape()', self.stacked_attention_memory.shape()

        if self.config.p.full_state_attention:
            prestack = [
                nn.ConcatNode([layer[i]
                               for layer in self.attention_memory], self.g)
                for i in range(len(self.attention_memory[0]))
            ]
            self.to_alpha = nn.StackNode(prestack, self.g)
        else:
            prestack = self.attention_memory[0]
            self.to_alpha = nn.StackNode(prestack, self.g)
            #print len(self.attention_memory),len(self.attention_memory[0]), self.attention_memory[0][0].value.shape
            #print 'to_alpha shape',self.to_alpha.value.shape

        # transpose
        self.to_alpha = nn.TransposeInPlaceNode(self.to_alpha, self.g)

        # to_alpha is (length, rish)
        if self.config.p.matrix_attention:
            self.to_alpha = nn.DotNode(self.v.attention_B, self.to_alpha,
                                       self.g)
示例#2
0
    def __init__(self, variables, config, proof_step, train=False):
        ''' this is the model.  As a single pass, it processes the
        inputs, and computes the losses, and runs a training step if
        train.
        '''
        DefaultModel.__init__(self, config, variables, train=train)

        # fix the random seed
        if not self.train:
            np.random.seed(proof_step.context.number +
                           +proof_step.prop.number + proof_step.tree.size())

        main = self.main_get_vector(proof_step.tree, proof_step.context.hyps,
                                    proof_step.context.f)

        main = nn.DotNode(main, self.v.W, self.g)

        # get a list [right prop, wrong prop 0, ..., wrong_prop n]
        props = self.get_props(proof_step)

        ###DEBUG
        #if not self.train: print [p.label for p in props]
        ###DEBUG
        out_vectors = [
            self.prop_get_vector(prop.tree, prop.hyps, prop.f)
            for prop in props
        ]
        stacked = nn.StackNode(out_vectors, self.g)
        stacked = nn.TransposeInPlaceNode(stacked, self.g)

        logits = nn.DotNode(main, stacked, self.g)
        cross_entropy = nn.SoftmaxCrossEntropyLoss(0, logits, self.g)
        self.loss = nn.AddNode([self.loss, cross_entropy], self.g)

        accuracy = 1 * (np.argmax(logits.value) == 0)
        self.outputs = [cross_entropy.value, accuracy, 1.0 / len(props)]
        self.output_counts = [1, 1, 1]

        # perform the backpropagation if we are training
        if train:
            self.g.backprop(self.loss)