def test_pyoptimizer_parameter(self): dev = D.Naive() Device.set_default(dev) pw1 = Parameter([8, 2], I.XavierUniform()) self.t.add(pw1) self.assertIn("testadam-m1", pw1.stats) self.assertIn("testadam-m2", pw1.stats)
def main(): args = get_arguments() print("initializing device ... ", end="", file=sys.stderr, flush=True) dev = D.Naive() if args.gpu < 0 else D.CUDA(args.gpu) Device.set_default(dev) print("done.", file=sys.stderr) mode = args.mode prefix = args.model if mode == "train": encdec = EncoderDecoder(args.dropout) encdec.init(args.src_vocab, args.trg_vocab, args.embed, args.hidden) optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) train(encdec, optimizer, args, 1e10) elif mode == "resume": print("loading model/optimizer ... ", end="", file=sys.stderr, flush=True) encdec = EncoderDecoder(args.dropout) encdec.load(prefix + ".model") optimizer = O.Adam() optimizer.load(prefix + ".optimizer") valid_ppl = load_ppl(prefix + ".valid_ppl") print("done.", file=sys.stderr) train(encdec, optimizer, args, valid_ppl) else: print("loading model ... ", end="", file=sys.stderr, flush=True) encdec = EncoderDecoder(args.dropout) encdec.load(prefix + ".model") print("done.", file=sys.stderr) test(encdec, args)
def primitiv_test1(self): dev = D.Naive() Device.set_default(dev) g = Graph() Graph.set_default(g) x = F.input(np.array([[1], [2], [3]])) y = 2 * x + 3 return y.to_list()
def primitiv_test2(self): dev = D.Naive() Device.set_default(dev) g = Graph() Graph.set_default(g) x = F.input(np.array([[1], [2]])) a = F.input(np.array([[1, 2], [1, 2]])) y = F.matmul(a, x) return y.to_list()
def test_pyoptimizer_not_implemented(self): dev = D.Naive() Device.set_default(dev) optimizer = IncompleteOptimizer() p = Parameter() with self.assertRaises(NotImplementedError): optimizer.add(p) with self.assertRaises(NotImplementedError): optimizer.update() with self.assertRaises(NotImplementedError): Optimizer.get_configs(optimizer) with self.assertRaises(NotImplementedError): Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, { 'Optimizer.clip_threshold': 0.0, 'Optimizer.lr_scale': 1.0, 'Optimizer.l2_strength': 0.0 })
def main(): parser = ArgumentParser() parser.add_argument("mode") parser.add_argument("model_prefix") args = parser.parse_args() mode = args.mode prefix = args.model_prefix print("mode:", mode, file=sys.stderr) print("prefix:", prefix, file=sys.stderr) if mode not in ("train", "resume", "test"): print("unknown mode:", mode, file=sys.stderr) return print("initializing device ... ", end="", file=sys.stderr) sys.stderr.flush() dev = D.Naive() # = D.CUDA(0) Device.set_default(dev) print("done.", file=sys.stderr) if mode == "train": encdec = EncoderDecoder("encdec", SRC_VOCAB_SIZE, TRG_VOCAB_SIZE, NUM_EMBED_UNITS, NUM_HIDDEN_UNITS, DROPOUT_RATE) trainer = T.Adam() trainer.set_weight_decay(1e-6) trainer.set_gradient_clipping(5) train(encdec, trainer, prefix, 1e10) elif mode == "resume": print("loading model/trainer ... ", end="", file=sys.stderr) sys.stderr.flush() encdec = EncoderDecoder.load("encdec", prefix + '.') trainer = T.Adam() trainer.load(prefix + ".trainer.config") valid_ppl = load_ppl(prefix + ".valid_ppl.config") print("done.", file=sys.stderr) train(encdec, trainer, prefix, valid_ppl) else: # mode == "test" print("loading model ... ", end="", file=sys.stderr) sys.stderr.flush() encdec = EncoderDecoder.load("encdec", prefix + '.') print("done.", file=sys.stderr) test(encdec)
def test_pytrainer_not_implemented(self): dev = D.Naive() Device.set_default(dev) trainer = IncompleteTrainer() p = Parameter(Shape([])) with self.assertRaises(NotImplementedError): trainer.add_parameter(p) with self.assertRaises(NotImplementedError): trainer.update() with self.assertRaises(NotImplementedError): Trainer.get_configs(trainer) with self.assertRaises(NotImplementedError): Trainer.set_configs(trainer, {'Trainer.epoch': 1}, { 'Trainer.clip_threshold': 0.0, 'Trainer.lr_scale': 1.0, 'Trainer.l2_strength': 0.0 })
def train_func(trainer): dev = D.Naive(12345) Device.set_default(dev) g = Graph() Graph.set_default(g) pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([1], I.Constant(0)) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) input_data = [1, 1, 1, -1, -1, 1, -1, -1] output_data = [1, -1, -1, 1] for i in range(10): g.clear() x = F.input(input_data, Shape([2], 4)) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 t = F.input(output_data, Shape([], 4)) diff = t - y loss = F.batch.mean(diff * diff) trainer.reset_gradients() loss.backward() trainer.update() return [ pw1.value.to_list(), pb1.value.to_list(), pw2.value.to_list(), pb2.value.to_list() ]
def test_pyoptimizer_propagate_exception(self): dev = D.Naive() Device.set_default(dev) optimizer = ExceptionOptimizer() p = Parameter() with self.assertRaises(TestException) as ctx: optimizer.add(p) self.assertEqual(str(ctx.exception), "configure_parameter") with self.assertRaises(TestException) as ctx: optimizer.update() self.assertEqual(str(ctx.exception), "update_parameter") with self.assertRaises(TestException) as ctx: Optimizer.get_configs(optimizer) self.assertEqual(str(ctx.exception), "get_configs") with self.assertRaises(TestException) as ctx: Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, { 'Optimizer.clip_threshold': 0.0, 'Optimizer.lr_scale': 1.0, 'Optimizer.l2_strength': 0.0 }) self.assertEqual(str(ctx.exception), "set_configs")
def main(config): mode = config['mode'] if mode == 'preproc': preproc(config) return print('initializing device ...', end='', file=sys.stderr, flush=True) dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu']) Device.set_default(dev) print("done.", file=sys.stderr, flush=True) prefix = config['model_prefix'] if mode == 'train': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.init(config['vocabulary_size'], config['d_model'], config['d_ff']) optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) train(model, optimizer, config, 1e10) elif mode == 'resume': print('loading model/optimizer ... ', end='', file=sys.stderr, flush=True) model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9) optimizer.set_gradient_clipping(5) optimizer.load(prefix + '.optimizer') with Path(prefix).with_suffix('.valid').open() as f: valid_ppl = float(f.read().strip()) print('done.', file=sys.stderr, flush=True) train(model, optimizer, config, valid_ppl) elif mode == 'test': model = Transformer(config['n_heads'], config['n_stacks'], config['dropout'], config['generation_limit']) model.load(prefix + '.model') test(model, config)
def setUp(self): self.dev = D.Naive() Device.set_default(self.dev)
def main(): dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) # Parameters pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([], I.Constant(0)) # Optimizer optimizer = O.SGD(0.1) # Registers parameters. optimizer.add_parameter(pw1) optimizer.add_parameter(pb1) optimizer.add_parameter(pw2) optimizer.add_parameter(pb2) # Training data input_data = [ np.array([1, 1], dtype=np.float32), # Sample 1 np.array([1, -1], dtype=np.float32), # Sample 2 np.array([-1, 1], dtype=np.float32), # Sample 3 np.array([-1, -1], dtype=np.float32), # Sample 4 ] output_data = [ np.array([1], dtype=np.float32), # Label 1 np.array([-1], dtype=np.float32), # Label 2 np.array([-1], dtype=np.float32), # Label 3 np.array([1], dtype=np.float32), # Label 4 ] g = Graph() Graph.set_default(g) for i in range(10): g.clear() # Builds a computation graph. x = F.input(input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 # Obtains values. y_val = y.to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) # Extends the computation graph to calculate loss values. t = F.input(output_data) diff = t - y loss = F.batch.mean(diff * diff) # Obtains the loss. loss_val = loss.to_float() print(" loss: ", loss_val) # Updates parameters. optimizer.reset_gradients() loss.backward() optimizer.update()
def primitiv_xor_test(self): dev = D.Naive() Device.set_default(dev) g = Graph() Graph.set_default(g) input_data = [ np.array([[1], [1]]), np.array([[-1], [1]]), np.array([[-1], [-1]]), np.array([[1], [-1]]), ] label_data = [ np.array([1]), np.array([-1]), np.array([1]), np.array([-1]), ] N = 8 pw = Parameter([1, N], I.XavierUniform()) pb = Parameter([], I.Constant(0)) pu = Parameter([N, 2], I.XavierUniform()) pc = Parameter([N], I.Constant(0)) if os.path.isfile('output/xor/pw.data') and os.path.isfile( 'output/xor/pb.data') and os.path.isfile( 'output/xor/pu.data') and os.path.isfile( 'output/xor/pc.data'): pw.load('output/xor/pw.data') pb.load('output/xor/pb.data') pu.load('output/xor/pu.data') pc.load('output/xor/pc.data') optimizer = O.SGD(0.01) optimizer.add(pw, pb, pu, pc) for epoch in range(1000): print(epoch, end=' ') g.clear() x = F.input(input_data) w = F.parameter(pw) b = F.parameter(pb) u = F.parameter(pu) c = F.parameter(pc) h = F.tanh(u @ x + c) y = F.tanh(w @ h + b) for val in y.to_list(): print('{:+.6f},'.format(val), end=' ') loss = self.calc_loss(y, label_data) print('loss={:.6f}'.format(loss.to_float())) optimizer.reset_gradients() loss.backward() optimizer.update() pw.save('output/xor/pw.data') pb.save('output/xor/pb.data') pu.save('output/xor/pu.data') pc.save('output/xor/pc.data') return y.to_list()
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform()) pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0)) pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform()) pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0)) optimizer = O.SGD(.5) optimizer.add(pw1, pb1, pw2, pb2) def make_graph(inputs, train): x = F.input(inputs) w1 = F.parameter(pw1) b1 = F.parameter(pb1) h = F.relu(w1 @ x + b1) h = F.dropout(h, .5, train) w2 = F.parameter(pw2) b2 = F.parameter(pb2) return w2 @ h + b2 ids = list(range(NUM_TRAIN_SAMPLES)) g = Graph() Graph.set_default(g) for epoch in range(MAX_EPOCH): random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) optimizer.reset_gradients() avg_loss.backward() optimizer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, False) y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
def setUp(self): self.dev = D.Naive() Device.set_default(self.dev) self.p = Parameter([8], I.Constant(0)) self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
def setUp(self): self.dev = D.Naive() Device.set_default(self.dev) self.p = Parameter(init=np.array([1, 2, 3, 4, 5, 6, 7, 8]))