def forward(self, x): u = self.wxh @ x + self.whh @ self.h + self.bh i = F.sigmoid(F.slice(u, 0, 0, self.out_size)) f = F.sigmoid(F.slice(u, 0, self.out_size, 2 * self.out_size)) o = F.sigmoid(F.slice(u, 0, 2 * self.out_size, 3 * self.out_size)) j = F.tanh(F.slice(u, 0, 3 * self.out_size, 4 * self.out_size)) self.c = i * j + f * self.c self.h = o * F.tanh(self.c) return self.h
def forward(self, x): u = self.wxh_ @ x + self.whh_ @ self.h_ + self.bh_ i = F.sigmoid(F.slice(u, 0, 0, self.out_size_)) f = F.sigmoid(F.slice(u, 0, self.out_size_, 2 * self.out_size_)) o = F.sigmoid(F.slice(u, 0, 2 * self.out_size_, 3 * self.out_size_)) j = F.tanh(F.slice(u, 0, 3 * self.out_size_, 4 * self.out_size_)) self.c_ = i * j + f * self.c_ self.h_ = o * F.tanh(self.c_) return self.h_
def forward(self, x): """One step forwarding.""" out_size = self.pwhh.shape()[1] u = self.wxh @ x + self.whh @ self.h + self.bh i = F.sigmoid(F.slice(u, 0, 0, out_size)) f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size)) o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size)) j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size)) self.c = i * j + f * self.c self.h = o * F.tanh(self.c) return self.h
def main(): with DefaultScopeDevice(CPUDevice()): pw1 = Parameter("w1", [8, 2], I.XavierUniform()) pb1 = Parameter("b1", [8], I.Constant(0)) pw2 = Parameter("w2", [1, 8], I.XavierUniform()) pb2 = Parameter("b2", [], I.Constant(0)) trainer = T.SGD(0.1) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) input_data = np.array( [ [1, 1], # Sample 1 [1, -1], # Sample 2 [-1, 1], # Sample 3 [-1, -1], # Sample 4 ], dtype=np.float32) output_data = np.array( [ 1, # Label 1 -1, # Label 2 -1, # Label 3 1, # Label 4 ], dtype=np.float32) for i in range(100): g = Graph() with DefaultScopeGraph(g): # Builds a computation graph. #x = F.input(shape=Shape([2], 4), data=input_data) x = F.input(data=input_data) w1 = F.input(param=pw1) b1 = F.input(param=pb1) w2 = F.input(param=pw2) b2 = F.input(param=pb2) h = F.tanh(F.matmul(w1, x) + b1) y = F.matmul(w2, h) + b2 # Calculates values. y_val = g.forward(y).to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) #t = F.input(shape=Shape([], 4), data=output_data) t = F.input(data=output_data) diff = t - y loss = F.batch.mean(diff * diff) loss_val = g.forward(loss).to_list()[0] print(" loss: ", loss_val) trainer.reset_gradients() g.backward(loss) trainer.update()
def decode_step(self, trg_words, train): e = F.pick(self.trg_lookup_, trg_words, 1) e = F.dropout(e, self.dropout_rate_, train) h = self.trg_lstm_.forward(F.concat([e, self.feed_], 0)) h = F.dropout(h, self.dropout_rate_, train) atten_probs = F.softmax(self.t_concat_fb_ @ h, 0) c = self.concat_fb_ @ atten_probs self.feed_ = F.tanh(self.whj_ @ F.concat([h, c], 0) + self.bj_) return self.wjy_ @ self.feed_ + self.by_
def decode_step(self, trg_words, train): """One step decoding.""" e = F.pick(self.trg_lookup, trg_words, 1) e = F.dropout(e, self.dropout_rate, train) h = self.trg_lstm.forward(F.concat([e, self.feed], 0)) h = F.dropout(h, self.dropout_rate, train) atten_probs = F.softmax(self.t_concat_fb @ h, 0) c = self.concat_fb @ atten_probs self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj) return self.wjy @ self.feed + self.by
def train_func(trainer): dev = D.Naive(12345) Device.set_default(dev) g = Graph() Graph.set_default(g) pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([1], I.Constant(0)) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) input_data = [1, 1, 1, -1, -1, 1, -1, -1] output_data = [1, -1, -1, 1] for i in range(10): g.clear() x = F.input(input_data, Shape([2], 4)) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 t = F.input(output_data, Shape([], 4)) diff = t - y loss = F.batch.mean(diff * diff) trainer.reset_gradients() loss.backward() trainer.update() return [ pw1.value.to_list(), pb1.value.to_list(), pw2.value.to_list(), pb2.value.to_list() ]
def forward(self, xs): x = F.concat(xs, 1) u = self.w_ @ x j = F.slice(u, 0, 0, self.out_size_) f = F.sigmoid( F.slice(u, 0, self.out_size_, 2 * self.out_size_) + F.broadcast(self.bf_, 1, len(xs))) r = F.sigmoid( F.slice(u, 0, 2 * self.out_size_, 3 * self.out_size_) + F.broadcast(self.bf_, 1, len(xs))) c = F.zeros([self.out_size_]) hs = [] for i in range(len(xs)): ji = F.slice(j, 1, i, i + 1) fi = F.slice(f, 1, i, i + 1) ri = F.slice(r, 1, i, i + 1) c = fi * c + (1 - fi) * ji hs.append(ri * F.tanh(c) + (1 - ri) * xs[i]) return hs
def main(): dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) # Parameters pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([], I.Constant(0)) # Optimizer optimizer = O.SGD(0.1) # Registers parameters. optimizer.add_parameter(pw1) optimizer.add_parameter(pb1) optimizer.add_parameter(pw2) optimizer.add_parameter(pb2) # Training data input_data = [ np.array([1, 1], dtype=np.float32), # Sample 1 np.array([1, -1], dtype=np.float32), # Sample 2 np.array([-1, 1], dtype=np.float32), # Sample 3 np.array([-1, -1], dtype=np.float32), # Sample 4 ] output_data = [ np.array([1], dtype=np.float32), # Label 1 np.array([-1], dtype=np.float32), # Label 2 np.array([-1], dtype=np.float32), # Label 3 np.array([1], dtype=np.float32), # Label 4 ] g = Graph() Graph.set_default(g) for i in range(10): g.clear() # Builds a computation graph. x = F.input(input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 # Obtains values. y_val = y.to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) # Extends the computation graph to calculate loss values. t = F.input(output_data) diff = t - y loss = F.batch.mean(diff * diff) # Obtains the loss. loss_val = loss.to_float() print(" loss: ", loss_val) # Updates parameters. optimizer.reset_gradients() loss.backward() optimizer.update()