def set_up_attention(self): self.attention_has_been_set_up = True if not self.config.p.attention: return #print 'attention', len(self.attention_memory),len(self.attention_memory[0]) prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] #print prestack self.stacked_attention_memory = nn.StackNode(prestack, self.g) #print 'stacked_memory.shape()', self.stacked_attention_memory.shape() if self.config.p.full_state_attention: prestack = [ nn.ConcatNode([layer[i] for layer in self.attention_memory], self.g) for i in range(len(self.attention_memory[0])) ] self.to_alpha = nn.StackNode(prestack, self.g) else: prestack = self.attention_memory[0] self.to_alpha = nn.StackNode(prestack, self.g) #print len(self.attention_memory),len(self.attention_memory[0]), self.attention_memory[0][0].value.shape #print 'to_alpha shape',self.to_alpha.value.shape # transpose self.to_alpha = nn.TransposeInPlaceNode(self.to_alpha, self.g) # to_alpha is (length, rish) if self.config.p.matrix_attention: self.to_alpha = nn.DotNode(self.v.attention_B, self.to_alpha, self.g)
def __init__(self, variables, config, proof_step, train=False): ''' this is the model. As a single pass, it processes the inputs, and computes the losses, and runs a training step if train. ''' DefaultModel.__init__(self, config, variables, train=train) # fix the random seed if not self.train: np.random.seed(proof_step.context.number + +proof_step.prop.number + proof_step.tree.size()) main = self.main_get_vector(proof_step.tree, proof_step.context.hyps, proof_step.context.f) main = nn.DotNode(main, self.v.W, self.g) # get a list [right prop, wrong prop 0, ..., wrong_prop n] props = self.get_props(proof_step) ###DEBUG #if not self.train: print [p.label for p in props] ###DEBUG out_vectors = [ self.prop_get_vector(prop.tree, prop.hyps, prop.f) for prop in props ] stacked = nn.StackNode(out_vectors, self.g) stacked = nn.TransposeInPlaceNode(stacked, self.g) logits = nn.DotNode(main, stacked, self.g) cross_entropy = nn.SoftmaxCrossEntropyLoss(0, logits, self.g) self.loss = nn.AddNode([self.loss, cross_entropy], self.g) accuracy = 1 * (np.argmax(logits.value) == 0) self.outputs = [cross_entropy.value, accuracy, 1.0 / len(props)] self.output_counts = [1, 1, 1] # perform the backpropagation if we are training if train: self.g.backprop(self.loss)