def test_pyoptimizer_parameter(self):
     dev = D.Naive()
     Device.set_default(dev)
     pw1 = Parameter([8, 2], I.XavierUniform())
     self.t.add(pw1)
     self.assertIn("testadam-m1", pw1.stats)
     self.assertIn("testadam-m2", pw1.stats)
示例#2
0
def main():
    args = get_arguments()

    print("initializing device ... ", end="", file=sys.stderr, flush=True)
    dev = D.Naive() if args.gpu < 0 else D.CUDA(args.gpu)
    Device.set_default(dev)
    print("done.", file=sys.stderr)

    mode = args.mode
    prefix = args.model
    if mode == "train":
        encdec = EncoderDecoder(args.dropout)
        encdec.init(args.src_vocab, args.trg_vocab, args.embed, args.hidden)
        optimizer = O.Adam()
        optimizer.set_weight_decay(1e-6)
        optimizer.set_gradient_clipping(5)
        train(encdec, optimizer, args, 1e10)
    elif mode == "resume":
        print("loading model/optimizer ... ",
              end="",
              file=sys.stderr,
              flush=True)
        encdec = EncoderDecoder(args.dropout)
        encdec.load(prefix + ".model")
        optimizer = O.Adam()
        optimizer.load(prefix + ".optimizer")
        valid_ppl = load_ppl(prefix + ".valid_ppl")
        print("done.", file=sys.stderr)
        train(encdec, optimizer, args, valid_ppl)
    else:
        print("loading model ... ", end="", file=sys.stderr, flush=True)
        encdec = EncoderDecoder(args.dropout)
        encdec.load(prefix + ".model")
        print("done.", file=sys.stderr)
        test(encdec, args)
示例#3
0
    def primitiv_test1(self):
        dev = D.Naive()
        Device.set_default(dev)
        g = Graph()
        Graph.set_default(g)

        x = F.input(np.array([[1], [2], [3]]))
        y = 2 * x + 3
        return y.to_list()
示例#4
0
    def primitiv_test2(self):
        dev = D.Naive()
        Device.set_default(dev)
        g = Graph()
        Graph.set_default(g)

        x = F.input(np.array([[1], [2]]))
        a = F.input(np.array([[1, 2], [1, 2]]))
        y = F.matmul(a, x)
        return y.to_list()
 def test_pyoptimizer_not_implemented(self):
     dev = D.Naive()
     Device.set_default(dev)
     optimizer = IncompleteOptimizer()
     p = Parameter()
     with self.assertRaises(NotImplementedError):
         optimizer.add(p)
     with self.assertRaises(NotImplementedError):
         optimizer.update()
     with self.assertRaises(NotImplementedError):
         Optimizer.get_configs(optimizer)
     with self.assertRaises(NotImplementedError):
         Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, {
             'Optimizer.clip_threshold': 0.0,
             'Optimizer.lr_scale': 1.0,
             'Optimizer.l2_strength': 0.0
         })
示例#6
0
def main():
    parser = ArgumentParser()
    parser.add_argument("mode")
    parser.add_argument("model_prefix")
    args = parser.parse_args()

    mode = args.mode
    prefix = args.model_prefix
    print("mode:", mode, file=sys.stderr)
    print("prefix:", prefix, file=sys.stderr)

    if mode not in ("train", "resume", "test"):
        print("unknown mode:", mode, file=sys.stderr)
        return

    print("initializing device ... ", end="", file=sys.stderr)
    sys.stderr.flush()

    dev = D.Naive()  # = D.CUDA(0)
    Device.set_default(dev)

    print("done.", file=sys.stderr)

    if mode == "train":
        encdec = EncoderDecoder("encdec", SRC_VOCAB_SIZE, TRG_VOCAB_SIZE,
                                NUM_EMBED_UNITS, NUM_HIDDEN_UNITS,
                                DROPOUT_RATE)
        trainer = T.Adam()
        trainer.set_weight_decay(1e-6)
        trainer.set_gradient_clipping(5)
        train(encdec, trainer, prefix, 1e10)
    elif mode == "resume":
        print("loading model/trainer ... ", end="", file=sys.stderr)
        sys.stderr.flush()
        encdec = EncoderDecoder.load("encdec", prefix + '.')
        trainer = T.Adam()
        trainer.load(prefix + ".trainer.config")
        valid_ppl = load_ppl(prefix + ".valid_ppl.config")
        print("done.", file=sys.stderr)
        train(encdec, trainer, prefix, valid_ppl)
    else:  # mode == "test"
        print("loading model ... ", end="", file=sys.stderr)
        sys.stderr.flush()
        encdec = EncoderDecoder.load("encdec", prefix + '.')
        print("done.", file=sys.stderr)
        test(encdec)
示例#7
0
 def test_pytrainer_not_implemented(self):
     dev = D.Naive()
     Device.set_default(dev)
     trainer = IncompleteTrainer()
     p = Parameter(Shape([]))
     with self.assertRaises(NotImplementedError):
         trainer.add_parameter(p)
     with self.assertRaises(NotImplementedError):
         trainer.update()
     with self.assertRaises(NotImplementedError):
         Trainer.get_configs(trainer)
     with self.assertRaises(NotImplementedError):
         Trainer.set_configs(trainer, {'Trainer.epoch': 1}, {
             'Trainer.clip_threshold': 0.0,
             'Trainer.lr_scale': 1.0,
             'Trainer.l2_strength': 0.0
         })
示例#8
0
def train_func(trainer):
    dev = D.Naive(12345)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([1], I.Constant(0))

    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    input_data = [1, 1, 1, -1, -1, 1, -1, -1]
    output_data = [1, -1, -1, 1]

    for i in range(10):
        g.clear()
        x = F.input(input_data, Shape([2], 4))
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        t = F.input(output_data, Shape([], 4))
        diff = t - y
        loss = F.batch.mean(diff * diff)

        trainer.reset_gradients()
        loss.backward()
        trainer.update()

    return [
        pw1.value.to_list(),
        pb1.value.to_list(),
        pw2.value.to_list(),
        pb2.value.to_list()
    ]
 def test_pyoptimizer_propagate_exception(self):
     dev = D.Naive()
     Device.set_default(dev)
     optimizer = ExceptionOptimizer()
     p = Parameter()
     with self.assertRaises(TestException) as ctx:
         optimizer.add(p)
     self.assertEqual(str(ctx.exception), "configure_parameter")
     with self.assertRaises(TestException) as ctx:
         optimizer.update()
     self.assertEqual(str(ctx.exception), "update_parameter")
     with self.assertRaises(TestException) as ctx:
         Optimizer.get_configs(optimizer)
     self.assertEqual(str(ctx.exception), "get_configs")
     with self.assertRaises(TestException) as ctx:
         Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, {
             'Optimizer.clip_threshold': 0.0,
             'Optimizer.lr_scale': 1.0,
             'Optimizer.l2_strength': 0.0
         })
     self.assertEqual(str(ctx.exception), "set_configs")
示例#10
0
def main(config):
    mode = config['mode']
    if mode == 'preproc':
        preproc(config)
        return

    print('initializing device ...', end='', file=sys.stderr, flush=True)
    dev = D.Naive() if config['gpu'] < 0 else D.CUDA(config['gpu'])
    Device.set_default(dev)
    print("done.", file=sys.stderr, flush=True)

    prefix = config['model_prefix']
    if mode == 'train':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.init(config['vocabulary_size'], config['d_model'],
                   config['d_ff'])
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        train(model, optimizer, config, 1e10)
    elif mode == 'resume':
        print('loading model/optimizer ... ',
              end='',
              file=sys.stderr,
              flush=True)
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        optimizer = O.Adam(alpha=1, beta2=0.98, eps=1e-9)
        optimizer.set_gradient_clipping(5)
        optimizer.load(prefix + '.optimizer')
        with Path(prefix).with_suffix('.valid').open() as f:
            valid_ppl = float(f.read().strip())
        print('done.', file=sys.stderr, flush=True)
        train(model, optimizer, config, valid_ppl)
    elif mode == 'test':
        model = Transformer(config['n_heads'], config['n_stacks'],
                            config['dropout'], config['generation_limit'])
        model.load(prefix + '.model')
        test(model, config)
示例#11
0
 def setUp(self):
     self.dev = D.Naive()
     Device.set_default(self.dev)
示例#12
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()
示例#13
0
    def primitiv_xor_test(self):
        dev = D.Naive()
        Device.set_default(dev)
        g = Graph()
        Graph.set_default(g)

        input_data = [
            np.array([[1], [1]]),
            np.array([[-1], [1]]),
            np.array([[-1], [-1]]),
            np.array([[1], [-1]]),
        ]

        label_data = [
            np.array([1]),
            np.array([-1]),
            np.array([1]),
            np.array([-1]),
        ]

        N = 8
        pw = Parameter([1, N], I.XavierUniform())
        pb = Parameter([], I.Constant(0))
        pu = Parameter([N, 2], I.XavierUniform())
        pc = Parameter([N], I.Constant(0))
        if os.path.isfile('output/xor/pw.data') and os.path.isfile(
                'output/xor/pb.data') and os.path.isfile(
                    'output/xor/pu.data') and os.path.isfile(
                        'output/xor/pc.data'):
            pw.load('output/xor/pw.data')
            pb.load('output/xor/pb.data')
            pu.load('output/xor/pu.data')
            pc.load('output/xor/pc.data')

        optimizer = O.SGD(0.01)
        optimizer.add(pw, pb, pu, pc)

        for epoch in range(1000):
            print(epoch, end=' ')

            g.clear()

            x = F.input(input_data)
            w = F.parameter(pw)
            b = F.parameter(pb)
            u = F.parameter(pu)
            c = F.parameter(pc)
            h = F.tanh(u @ x + c)
            y = F.tanh(w @ h + b)

            for val in y.to_list():
                print('{:+.6f},'.format(val), end=' ')

            loss = self.calc_loss(y, label_data)
            print('loss={:.6f}'.format(loss.to_float()))

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

        pw.save('output/xor/pw.data')
        pb.save('output/xor/pb.data')
        pu.save('output/xor/pu.data')
        pc.save('output/xor/pc.data')

        return y.to_list()
示例#14
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform())
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0))
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform())
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0))

    optimizer = O.SGD(.5)
    optimizer.add(pw1, pb1, pw2, pb2)

    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, False)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
示例#15
0
 def setUp(self):
     self.dev = D.Naive()
     Device.set_default(self.dev)
     self.p = Parameter([8], I.Constant(0))
     self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
示例#16
0
 def setUp(self):
     self.dev = D.Naive()
     Device.set_default(self.dev)
     self.p = Parameter(init=np.array([1, 2, 3, 4, 5, 6, 7, 8]))