train_ds = nds.VarLenDataSet(train_embed, train_label) dev_ds = nds.VarLenDataSet(dev_embed, dev_label) test_ds = nds.VarLenDataSet(test_embed, test_label) # Build Computation Graph graph = ng.Graph(nl.LogLoss(), ns.Adam(eta=0.01)) # Word Embedding Matrix using Xavier word_embedding = graph.param_of([len(word_dict), wv_dim]) # Weight vector weight = graph.param_of([wv_dim, 1]) input_node = graph.input() embed = nd.Embed(input_node, word_embedding) average = nd.Average(embed) dot = nd.Dot(average, weight) sigmoid = nd.Sigmoid(dot) graph.output(sigmoid) epochs = 100 batch_size = 50 def train(): loss_sum = 0.0 batch_counter = 0 for batch in train_ds.batches(batch_size): input_node.value = batch.data graph.expect(np.float64(batch.expect).reshape(-1, 1)) loss, accuracy = graph.train() loss_sum += loss
self.smax = smax def compute(self): return np.einsum("bld,bl->bd", self.embed.value, self.smax.value) def updateGrad(self): self.embed.grad += np.einsum("bd,bl->bld", self.grad, self.smax.value) self.smax.grad += np.einsum("bd,bld->bl", self.grad, self.embed.value) input_node = graph.input() embed = nd.Embed(input_node, word_embedding) mapped = EmbedMap(embed, attention_weight) softmax = nd.SoftMax(mapped) attention = Attention(embed, softmax) dot = nd.Dot(attention, weight) sigmoid = nd.Sigmoid(dot) graph.output(sigmoid) epochs = 100 batch_size = 50 def train(): loss_sum = 0.0 batch_counter = 0 for batch in train_ds.batches(batch_size): input_node.value = batch.data graph.expect(np.float64(batch.expect).reshape(-1, 1)) loss, accuracy = graph.train() loss_sum += loss