示例#1
0
 def test_pytrainer_not_implemented(self):
     dev = D.Naive()
     Device.set_default(dev)
     trainer = IncompleteTrainer()
     p = Parameter(Shape([]))
     with self.assertRaises(NotImplementedError):
         trainer.add_parameter(p)
     with self.assertRaises(NotImplementedError):
         trainer.update()
     with self.assertRaises(NotImplementedError):
         Trainer.get_configs(trainer)
     with self.assertRaises(NotImplementedError):
         Trainer.set_configs(trainer, {'Trainer.epoch': 1}, {
             'Trainer.clip_threshold': 0.0,
             'Trainer.lr_scale': 1.0,
             'Trainer.l2_strength': 0.0
         })
 def test_pyoptimizer_not_implemented(self):
     dev = D.Naive()
     Device.set_default(dev)
     optimizer = IncompleteOptimizer()
     p = Parameter()
     with self.assertRaises(NotImplementedError):
         optimizer.add(p)
     with self.assertRaises(NotImplementedError):
         optimizer.update()
     with self.assertRaises(NotImplementedError):
         Optimizer.get_configs(optimizer)
     with self.assertRaises(NotImplementedError):
         Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, {
             'Optimizer.clip_threshold': 0.0,
             'Optimizer.lr_scale': 1.0,
             'Optimizer.l2_strength': 0.0
         })
示例#3
0
class LSTM(Model):
    """LSTM cell."""

    def __init__(self):
        self._pwxh = Parameter();
        self._pwhh = Parameter();
        self._pbh = Parameter();
        self.scan_attributes()

    def init(self, in_size, out_size):
        """Creates a new LSTM."""
        self._pwxh.init([4 * out_size, in_size], I.XavierUniform())
        self._pwhh.init([4 * out_size, out_size], I.XavierUniform())
        self._pbh.init([4 * out_size], I.Constant(0))

    def reset(self, init_c = Node(), init_h = Node()):
        """Initializes internal states."""
        out_size = self._pwhh.shape()[1]
        self._wxh = F.parameter(self._pwxh)
        self._whh = F.parameter(self._pwhh)
        self._bh = F.parameter(self._pbh)
        self._c = init_c if init_c.valid() else F.zeros([out_size])
        self._h = init_h if init_h.valid() else F.zeros([out_size])

    def forward(self, x):
        """One step forwarding."""
        out_size = self._pwhh.shape()[1]
        u = self._wxh @ x + self._whh @ self._h + self._bh
        i = F.sigmoid(F.slice(u, 0, 0, out_size))
        f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size));
        o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size));
        j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size));
        self._c = i * j + f * self._c;
        self._h = o * F.tanh(self._c);
        return self._h;

    def get_c(self):
        """Retrieves current internal cell state."""
        return self._c

    def get_h(self):
        """Retrieves current hidden value."""
        return self._h
 def test_pyoptimizer_propagate_exception(self):
     dev = D.Naive()
     Device.set_default(dev)
     optimizer = ExceptionOptimizer()
     p = Parameter()
     with self.assertRaises(TestException) as ctx:
         optimizer.add(p)
     self.assertEqual(str(ctx.exception), "configure_parameter")
     with self.assertRaises(TestException) as ctx:
         optimizer.update()
     self.assertEqual(str(ctx.exception), "update_parameter")
     with self.assertRaises(TestException) as ctx:
         Optimizer.get_configs(optimizer)
     self.assertEqual(str(ctx.exception), "get_configs")
     with self.assertRaises(TestException) as ctx:
         Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, {
             'Optimizer.clip_threshold': 0.0,
             'Optimizer.lr_scale': 1.0,
             'Optimizer.l2_strength': 0.0
         })
     self.assertEqual(str(ctx.exception), "set_configs")
示例#5
0
 def test_model_invalid_operation(self):
     model1 = Model()
     model2 = Model()
     model1.add("m", model2)
     param = Parameter()
     model1.add("p", param)
     with self.assertRaises(TypeError) as e:
         model1["notfound"]
     self.assertEqual(
         str(e.exception),
         "'name' is not a name of neither parameter nor submodel")
     with self.assertRaises(TypeError):
         del model1["p"]
     with self.assertRaises(TypeError):
         del model1["m"]
     with self.assertRaises(TypeError):
         del model1[0]
     with self.assertRaises(TypeError):
         model1[(0, 1)]
     with self.assertRaises(TypeError):
         model1[[0, 1]]
示例#6
0
    def test_ModelTest_CheckGetSubmodelRecursiveByTuple(self):
        m = Model()
        sm1 = Model()
        sm2 = Model()
        ssm = Model()
        p = Parameter()
        m.add("p", p)
        m.add("sm1", sm1)
        m.add("sm2", sm2)
        sm1.add("ssm", ssm)

        self.assertIs(sm1, m["sm1"]);
        self.assertIs(sm2, m["sm2"]);
        self.assertIs(ssm, m["sm1", "ssm"]);
        self.assertIs(ssm, sm1["ssm"]);
        m["p"]
        with self.assertRaises(TypeError):
            m["ssm"]
        with self.assertRaises(TypeError):
            m["sm2", "ssm"]
        with self.assertRaises(TypeError):
            m["x"]
示例#7
0
 def load(name, prefix):
     encdec = EncoderDecoder.__new__(EncoderDecoder)
     encdec.name_ = name
     encdec.psrc_lookup_ = Parameter.load(prefix + name +
                                          "_src_lookup.param")
     encdec.ptrg_lookup_ = Parameter.load(prefix + name +
                                          "_trg_lookup.param")
     encdec.pwhj_ = Parameter.load(prefix + name + "_whj.param")
     encdec.pbj_ = Parameter.load(prefix + name + "_bj.param")
     encdec.pwjy_ = Parameter.load(prefix + name + "_wjy.param")
     encdec.pby_ = Parameter.load(prefix + name + "_by.param")
     encdec.src_fw_lstm_ = LSTM.load(name + "_src_fw_lstm", prefix)
     encdec.src_bw_lstm_ = LSTM.load(name + "_src_bw_lstm", prefix)
     encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix)
     encdec.embed_size_ = encdec.pbj_.shape()[0]
     with open(prefix + name + ".config", "r", encoding="utf-8") as f:
         encdec.dropout_rate_ = float(f.readline())
     return encdec
示例#8
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()
示例#9
0
class EncoderDecoder(object):
    def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size,
                 hidden_size, dropout_rate):
        self.name_ = name
        self.dropout_rate_ = dropout_rate
        self.psrc_lookup_ = Parameter([embed_size, src_vocab_size],
                                      I.XavierUniform())
        self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size],
                                      I.XavierUniform())
        self.pwhy_ = Parameter([trg_vocab_size, hidden_size],
                               I.XavierUniform())
        self.pby_ = Parameter([trg_vocab_size], I.Constant(0))
        self.src_lstm_ = LSTM(name + "_src_lstm", embed_size, hidden_size)
        self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size, hidden_size)

    # Loads all parameters.
    @staticmethod
    def load(name, prefix):
        encdec = EncoderDecoder.__new__(EncoderDecoder)
        encdec.name_ = name
        encdec.psrc_lookup_ = Parameter.load(prefix + name +
                                             "_src_lookup.param")
        encdec.ptrg_lookup_ = Parameter.load(prefix + name +
                                             "_trg_lookup.param")
        encdec.pwhy_ = Parameter.load(prefix + name + "_why.param")
        encdec.pby_ = Parameter.load(prefix + name + "_by.param")
        encdec.src_lstm_ = LSTM.load(name + "_src_lstm", prefix)
        encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix)
        with open(prefix + name + ".config", "r") as ifs:
            encdec.dropout_rate_ = float(ifs.readline())
        return encdec

    # Saves all parameters.
    def save(self, prefix):
        self.psrc_lookup_.save(prefix + self.name_ + "_src_lookup.param")
        self.ptrg_lookup_.save(prefix + self.name_ + "_trg_lookup.param")
        self.pwhy_.save(prefix + self.name_ + "_why.param")
        self.pby_.save(prefix + self.name_ + "_by.param")
        self.src_lstm_.save(prefix)
        self.trg_lstm_.save(prefix)
        with open(prefix + self.name_ + ".config", "w") as ofs:
            print(self.dropout_rate_, file=ofs)

    # Adds parameters to the trainer.
    def register_training(self, trainer):
        trainer.add_parameter(self.psrc_lookup_)
        trainer.add_parameter(self.ptrg_lookup_)
        trainer.add_parameter(self.pwhy_)
        trainer.add_parameter(self.pby_)
        self.src_lstm_.register_training(trainer)
        self.trg_lstm_.register_training(trainer)

    # Encodes source sentences and prepare internal states.
    def encode(self, src_batch, train):
        # Reversed encoding.
        src_lookup = F.parameter(self.psrc_lookup_)
        self.src_lstm_.init()
        for it in src_batch:
            x = F.pick(src_lookup, it, 1)
            x = F.dropout(x, self.dropout_rate_, train)
            self.src_lstm_.forward(x)

        # Initializes decoder states.
        self.trg_lookup_ = F.parameter(self.ptrg_lookup_)
        self.why_ = F.parameter(self.pwhy_)
        self.by_ = F.parameter(self.pby_)
        self.trg_lstm_.init(self.src_lstm_.get_c(), self.src_lstm_.get_h())

    # One step decoding.
    def decode_step(self, trg_words, train):
        x = F.pick(self.trg_lookup_, trg_words, 1)
        x = F.dropout(x, self.dropout_rate_, train)
        h = self.trg_lstm_.forward(x)
        h = F.dropout(h, self.dropout_rate_, train)
        return self.why_ @ h + self.by_

    # Calculates the loss function over given target sentences.
    def loss(self, trg_batch, train):
        losses = []
        for i in range(len(trg_batch) - 1):
            y = self.decode_step(trg_batch[i], train)
            losses.append(F.softmax_cross_entropy(y, trg_batch[i + 1], 0))
        return F.batch.mean(F.sum(losses))
示例#10
0
 def __init__(self):
     self.param = Parameter([5], I.Constant(0))
     self.param.gradient = tF.raw_input([5], [1, 2, 3, 4, 5])
     self.scan_attributes()
示例#11
0
 def __init__(self):
     self.pwxh = Parameter()
     self.pwhh = Parameter()
     self.pbh = Parameter()
     self.add_all_parameters()
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte",
                               NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte",
                               NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Uses GPU.
    #dev = CUDADevice(0)
    with DefaultScopeDevice(CPUDevice()):

        # Parameters for the multilayer perceptron.
        pw1 = Parameter("w1", [NUM_HIDDEN_UNITS, NUM_INPUT_UNITS],
                        XavierUniform())
        pb1 = Parameter("b1", [NUM_HIDDEN_UNITS], Constant(0))
        pw2 = Parameter("w2", [NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS],
                        XavierUniform())
        pb2 = Parameter("b2", [NUM_OUTPUT_UNITS], Constant(0))

        # Parameters for batch normalization.
        #Parameter pbeta("beta", {NUM_HIDDEN_UNITS}, Constant(0));
        #Parameter pgamma("gamma", {NUM_HIDDEN_UNITS}, Constant(1));

        # Trainer
        trainer = SGD(.5)
        trainer.add_parameter(pw1)
        trainer.add_parameter(pb1)
        trainer.add_parameter(pw2)
        trainer.add_parameter(pb2)

        #trainer.add_parameter(&pbeta);
        #trainer.add_parameter(&pgamma);

        # Helper lambda to construct the predictor network.
        def make_graph(inputs, train):
            # Stores input values.
            x = F.input(data=inputs)
            # Calculates the hidden layer.
            w1 = F.input(param=pw1)
            b1 = F.input(param=pb1)
            h = F.relu(F.matmul(w1, x) + b1)
            # Batch normalization
            #Node beta = F::input(pbeta);
            #Node gamma = F::input(pgamma);
            #h = F::batch::normalize(h) * gamma + beta;
            # Dropout
            h = F.dropout(h, .5, train)
            # Calculates the output layer.
            w2 = F.input(param=pw2)
            b2 = F.input(param=pb2)
            return F.matmul(w2, h) + b2

        ids = list(range(NUM_TRAIN_SAMPLES))

        for epoch in range(MAX_EPOCH):
            # Shuffles sample IDs.
            random.shuffle(ids)

            # Training loop
            for batch in range(NUM_TRAIN_BATCHES):
                print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES),
                      end="")
                inputs = train_inputs[ids[batch * BATCH_SIZE:(batch + 1) *
                                          BATCH_SIZE]]
                labels = train_labels[ids[batch * BATCH_SIZE:(batch + 1) *
                                          BATCH_SIZE]]

                trainer.reset_gradients()

                # Constructs the graph.
                g = Graph()
                with DefaultScopeGraph(g):
                    y = make_graph(inputs, True)
                    loss = F.softmax_cross_entropy(y, labels, 0)
                    avg_loss = F.batch.mean(loss)

                    # Dump computation graph at the first time.
                    #if (epoch == 0 && batch == 0) g.dump();

                    # Forward, backward, and updates parameters.
                    g.forward(avg_loss)
                    g.backward(avg_loss)

                    trainer.update()

            print()

            match = 0

            # Test loop
            for batch in range(NUM_TEST_BATCHES):
                print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES),
                      end="")
                # Makes a test minibatch.
                inputs = test_inputs[batch * BATCH_SIZE:(batch + 1) *
                                     BATCH_SIZE]

                # Constructs the graph.
                with Graph() as g:
                    y = make_graph(inputs, False)

                    # Gets outputs, argmax, and compares them with the label.
                    y_val = g.forward(y).to_list()
                    for i in range(BATCH_SIZE):
                        maxval = -1e10
                        argmax = -1
                        for j in range(NUM_OUTPUT_UNITS):
                            v = y_val[j + i * NUM_OUTPUT_UNITS]
                            if (v > maxval):
                                maxval = v
                                argmax = j
                        if argmax == test_labels[i + batch * BATCH_SIZE]:
                            match += 1

            accuracy = 100.0 * match / NUM_TEST_SAMPLES
            print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
示例#13
0
 def __init__(self, in_size, out_size):
     self.out_size = out_size
     self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pbf = Parameter([out_size], I.Constant(0))
     self.pbr = Parameter([out_size], I.Constant(0))
     self.scan_attributes()
示例#14
0
 def __init__(self):
     self._pwxh = Parameter();
     self._pwhh = Parameter();
     self._pbh = Parameter();
     self.scan_attributes()
示例#15
0
class EncoderDecoder(object):
    def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size,
                 hidden_size, dropout_rate):
        self.name_ = name
        self.embed_size_ = embed_size
        self.dropout_rate_ = dropout_rate
        self.psrc_lookup_ = Parameter([embed_size, src_vocab_size],
                                      I.XavierUniform())
        self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size],
                                      I.XavierUniform())
        self.pwhj_ = Parameter([embed_size, 2 * hidden_size],
                               I.XavierUniform())
        self.pbj_ = Parameter([embed_size], I.Constant(0))
        self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform())
        self.pby_ = Parameter([trg_vocab_size], I.Constant(0))
        self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size,
                                 hidden_size)
        self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size,
                                 hidden_size)
        self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size)

    # Loads all parameters.
    @staticmethod
    def load(name, prefix):
        encdec = EncoderDecoder.__new__(EncoderDecoder)
        encdec.name_ = name
        encdec.psrc_lookup_ = Parameter.load(prefix + name +
                                             "_src_lookup.param")
        encdec.ptrg_lookup_ = Parameter.load(prefix + name +
                                             "_trg_lookup.param")
        encdec.pwhj_ = Parameter.load(prefix + name + "_whj.param")
        encdec.pbj_ = Parameter.load(prefix + name + "_bj.param")
        encdec.pwjy_ = Parameter.load(prefix + name + "_wjy.param")
        encdec.pby_ = Parameter.load(prefix + name + "_by.param")
        encdec.src_fw_lstm_ = LSTM.load(name + "_src_fw_lstm", prefix)
        encdec.src_bw_lstm_ = LSTM.load(name + "_src_bw_lstm", prefix)
        encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix)
        encdec.embed_size_ = encdec.pbj_.shape()[0]
        with open(prefix + name + ".config", "r", encoding="utf-8") as f:
            encdec.dropout_rate_ = float(f.readline())
        return encdec

    # Saves all parameters
    def save(self, prefix):
        self.psrc_lookup_.save(prefix + self.name_ + "_src_lookup.param")
        self.ptrg_lookup_.save(prefix + self.name_ + "_trg_lookup.param")
        self.pwhj_.save(prefix + self.name_ + "_whj.param")
        self.pbj_.save(prefix + self.name_ + "_bj.param")
        self.pwjy_.save(prefix + self.name_ + "_wjy.param")
        self.pby_.save(prefix + self.name_ + "_by.param")
        self.src_fw_lstm_.save(prefix)
        self.src_bw_lstm_.save(prefix)
        self.trg_lstm_.save(prefix)
        with open(prefix + self.name_ + ".config", "w", encoding="utf-8") as f:
            print(self.dropout_rate_, file=f)

    # Adds parameters to the trainer
    def register_training(self, trainer):
        trainer.add_parameter(self.psrc_lookup_)
        trainer.add_parameter(self.ptrg_lookup_)
        trainer.add_parameter(self.pwhj_)
        trainer.add_parameter(self.pbj_)
        trainer.add_parameter(self.pwjy_)
        trainer.add_parameter(self.pby_)
        self.src_fw_lstm_.register_training(trainer)
        self.src_bw_lstm_.register_training(trainer)
        self.trg_lstm_.register_training(trainer)

    # Encodes source sentences and prepare internal states.
    def encode(self, src_batch, train):
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup_)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate_, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm_.init()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm_.forward(e)
            f = F.dropout(f, self.dropout_rate_, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm_.init()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm_.forward(e)
            b = F.dropout(b, self.dropout_rate_, train)
            b_list.append(b)

        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))]
        self.concat_fb_ = F.concat(fb_list, 1)
        self.t_concat_fb_ = F.transpose(self.concat_fb_)

        # Initializes decode states.
        self.trg_lookup_ = F.parameter(self.ptrg_lookup_)
        self.whj_ = F.parameter(self.pwhj_)
        self.bj_ = F.parameter(self.pbj_)
        self.wjy_ = F.parameter(self.pwjy_)
        self.by_ = F.parameter(self.pby_)
        self.feed_ = F.zeros([self.embed_size_])
        self.trg_lstm_.init(
            self.src_fw_lstm_.get_c() + self.src_bw_lstm_.get_c(),
            self.src_fw_lstm_.get_h() + self.src_bw_lstm_.get_h())

    # One step decoding.
    def decode_step(self, trg_words, train):
        e = F.pick(self.trg_lookup_, trg_words, 1)
        e = F.dropout(e, self.dropout_rate_, train)
        h = self.trg_lstm_.forward(F.concat([e, self.feed_], 0))
        h = F.dropout(h, self.dropout_rate_, train)
        atten_probs = F.softmax(self.t_concat_fb_ @ h, 0)
        c = self.concat_fb_ @ atten_probs
        self.feed_ = F.tanh(self.whj_ @ F.concat([h, c], 0) + self.bj_)
        return self.wjy_ @ self.feed_ + self.by_

    # Calculates the loss function over given target sentences.
    def loss(self, trg_batch, train):
        losses = []
        for i in range(len(trg_batch) - 1):
            y = self.decode_step(trg_batch[i], train)
            loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0)
            losses.append(loss)
        return F.batch.mean(F.sum(losses))
示例#16
0
    def __init__(self, dropout_rate):
        self.dropout_rate_ = dropout_rate

        self.psrc_lookup_ = Parameter()
        self.ptrg_lookup_ = Parameter()
        self.pwfbw_ = Parameter()
        self.pwhw_ = Parameter()
        self.pwwe_ = Parameter()
        self.pwhj_ = Parameter()
        self.pbj_ = Parameter()
        self.pwjy_ = Parameter()
        self.pby_ = Parameter()
        self.src_fw_lstm_ = LSTM()
        self.src_bw_lstm_ = LSTM()
        self.trg_lstm_ = LSTM()

        self.scan_attributes()
示例#17
0
class EncoderDecoder(Model):
    def __init__(self, dropout_rate):
        self.dropout_rate_ = dropout_rate

        self.psrc_lookup_ = Parameter()
        self.ptrg_lookup_ = Parameter()
        self.pwfbw_ = Parameter()
        self.pwhw_ = Parameter()
        self.pwwe_ = Parameter()
        self.pwhj_ = Parameter()
        self.pbj_ = Parameter()
        self.pwjy_ = Parameter()
        self.pby_ = Parameter()
        self.src_fw_lstm_ = LSTM()
        self.src_bw_lstm_ = LSTM()
        self.trg_lstm_ = LSTM()

        self.scan_attributes()


    def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
        self.psrc_lookup_.init([embed_size, src_vocab_size], I.XavierUniform())
        self.ptrg_lookup_.init([embed_size, trg_vocab_size], I.XavierUniform())
        self.pwfbw_.init([2*hidden_size, hidden_size], I.XavierUniform())
        self.pwhw_.init([hidden_size, hidden_size], I.XavierUniform())
        self.pwwe_.init([hidden_size], I.XavierUniform())
        self.pwhj_.init([embed_size, hidden_size], I.XavierUniform())
        self.pbj_.init([embed_size], I.Constant(0))
        self.pwjy_.init([trg_vocab_size, embed_size], I.XavierUniform())
        self.pby_.init([trg_vocab_size], I.Constant(0))
        self.src_fw_lstm_.init(embed_size, hidden_size)
        self.src_bw_lstm_.init(embed_size, hidden_size)
        self.trg_lstm_.init(embed_size+hidden_size*2, hidden_size)

    def encode(self, src_batch, train):
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup_)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate_, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm_.reset()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm_.forward(e)
            f = F.dropout(f, self.dropout_rate_, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm_.reset()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm_.forward(e)
            b = F.dropout(b, self.dropout_rate_, train)
            b_list.append(b)
        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [F.concat([f_list[i], b_list[i]], 0) for i in range(len(src_batch))]
        self.concat_fb = F.concat(fb_list, 1)
        self.t_concat_fb = F.transpose(self.concat_fb)

        # Initializes decode states.
        self.wfbw_ = F.parameter(self.pwfbw_)
        self.whw_ = F.parameter(self.pwhw_)
        self.wwe_ = F.parameter(self.pwwe_)
        self.trg_lookup_ = F.parameter(self.ptrg_lookup_)
        self.whj_ = F.parameter(self.pwhj_)
        self.bj_ = F.parameter(self.pbj_)
        self.wjy_ = F.parameter(self.pwjy_)
        self.by_ = F.parameter(self.pby_)
        self.trg_lstm_.reset()

    # One step decoding.
    def decode_step(self, trg_words, train):
        sentence_len = self.concat_fb.shape()[1]

        b = self.whw_ @ self.trg_lstm_.get_h()
        b = F.reshape(b, Shape([1, b.shape()[0]]))
        b = F.broadcast(b, 0, sentence_len)
        x = F.tanh(self.t_concat_fb @ self.wfbw_ + b)
        atten_prob = F.softmax(x @ self.wwe_, 0)
        c = self.concat_fb @ atten_prob

        e = F.pick(self.trg_lookup_, trg_words, 1)
        e = F.dropout(e, self.dropout_rate_, train)

        h = self.trg_lstm_.forward(F.concat([e, c], 0))
        h = F.dropout(h, self.dropout_rate_, train)
        j = F.tanh(self.whj_ @ h + self.bj_)
        return self.wjy_ @ j + self.by_

    # Calculates the loss function over given target sentences.
    def loss(self, trg_batch, train):
        losses = []
        for i in range(len(trg_batch)-1):
            y = self.decode_step(trg_batch[i], train)
            loss = F.softmax_cross_entropy(y, trg_batch[i+1], 0)
            losses.append(loss)
        return F.batch.mean(F.sum(losses))
示例#18
0
 def __init__(self):
     self.dropout_rate = DROPOUT_RATE
     self.psrc_lookup = Parameter()
     self.ptrg_lookup = Parameter()
     self.pwhj = Parameter()
     self.pbj = Parameter()
     self.pwjy = Parameter()
     self.pby = Parameter()
     self.src_fw_lstm = LSTM()
     self.src_bw_lstm = LSTM()
     self.trg_lstm = LSTM()
     self.add_all_parameters()
     self.add_all_submodels()
示例#19
0
class AttentionalEncoderDecoder(Model):
    """Encoder-decoder translation model with dot-attention."""
    def __init__(self):
        self.dropout_rate = DROPOUT_RATE
        self.psrc_lookup = Parameter()
        self.ptrg_lookup = Parameter()
        self.pwhj = Parameter()
        self.pbj = Parameter()
        self.pwjy = Parameter()
        self.pby = Parameter()
        self.src_fw_lstm = LSTM()
        self.src_bw_lstm = LSTM()
        self.trg_lstm = LSTM()
        self.add_all_parameters()
        self.add_all_submodels()

    def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
        """Creates a new AttentionalEncoderDecoder object."""
        self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform())
        self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform())
        self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform())
        self.pbj.init([embed_size], I.Constant(0))
        self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform())
        self.pby.init([trg_vocab_size], I.Constant(0))
        self.src_fw_lstm.init(embed_size, hidden_size)
        self.src_bw_lstm.init(embed_size, hidden_size)
        self.trg_lstm.init(2 * embed_size, hidden_size)

    def encode(self, src_batch, train):
        """Encodes source sentences and prepares internal states."""
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm.restart()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm.forward(e)
            f = F.dropout(f, self.dropout_rate, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm.restart()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm.forward(e)
            b = F.dropout(b, self.dropout_rate, train)
            b_list.append(b)

        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))]
        self.concat_fb = F.concat(fb_list, 1)
        self.t_concat_fb = F.transpose(self.concat_fb)

        # Initializes decode states.
        embed_size = self.psrc_lookup.shape()[0]
        self.trg_lookup = F.parameter(self.ptrg_lookup)
        self.whj = F.parameter(self.pwhj)
        self.bj = F.parameter(self.pbj)
        self.wjy = F.parameter(self.pwjy)
        self.by = F.parameter(self.pby)
        self.feed = F.zeros([embed_size])
        self.trg_lstm.restart(
            self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(),
            self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h())

    def decode_step(self, trg_words, train):
        """One step decoding."""
        e = F.pick(self.trg_lookup, trg_words, 1)
        e = F.dropout(e, self.dropout_rate, train)
        h = self.trg_lstm.forward(F.concat([e, self.feed], 0))
        h = F.dropout(h, self.dropout_rate, train)
        atten_probs = F.softmax(self.t_concat_fb @ h, 0)
        c = self.concat_fb @ atten_probs
        self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj)
        return self.wjy @ self.feed + self.by

    def loss(self, trg_batch, train):
        """Calculates loss values."""
        losses = []
        for i in range(len(trg_batch) - 1):
            y = self.decode_step(trg_batch[i], train)
            loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0)
            losses.append(loss)
        return F.batch.mean(F.sum(losses))
示例#20
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte",
                               NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte",
                               NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.CUDA(0)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    # Parameters of CNNs
    # Shape: {kernel_height, kernel_width, in_channels, out_channels}
    pw_cnn1 = Parameter(Shape([KERNEL_SIZE1, KERNEL_SIZE1, 1, NUM_CHANNELS1]),
                        I.XavierUniformConv2D())
    pw_cnn2 = Parameter(
        Shape([KERNEL_SIZE2, KERNEL_SIZE2, NUM_CHANNELS1, NUM_CHANNELS2]),
        I.XavierUniformConv2D())

    # Parameters of FC layers
    pw_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS]),
                       I.XavierUniform())
    pw_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS]),
                       I.XavierUniform())
    pb_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS]), I.Constant(0))
    pb_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS]), I.Constant(0))

    # Optimizer
    optimizer = O.SGD(.1)
    optimizer.add(pw_cnn1, pw_cnn2, pw_fc1, pw_fc2, pb_fc1, pb_fc2)

    # Helper lambda to construct the predictor network.
    def make_graph(inputs, train):
        # Input and parameters.
        #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs)
        x = F.input(inputs)
        w_cnn1 = F.parameter(pw_cnn1)
        w_cnn2 = F.parameter(pw_cnn2)
        w_fc1 = F.parameter(pw_fc1)
        w_fc2 = F.parameter(pw_fc2)
        b_fc1 = F.parameter(pb_fc1)
        b_fc2 = F.parameter(pb_fc2)
        # CNNs
        h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1))
        h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2)
        h_cnn2 = F.relu(
            F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1))
        h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2)
        # FC layers
        x_fc = F.dropout(F.flatten(h_pool2), .5, train)
        h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train)
        return F.matmul(w_fc2, h_fc) + b_fc2

    # Batch randomizer
    ids = list(range(NUM_TRAIN_SAMPLES))

    for epoch in range(MAX_EPOCH):
        # Shuffles sample IDs.
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES),
                  end="")
            # Makes a minibatch for training.
            inputs = [
                train_inputs[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]
            labels = [
                train_labels[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            # Dump computation graph at the first time.
            # if epoch == 0 and batch == 0:
            #     print(g.dump("dot"))

            # Implicit forward, backward, and updates parameters.
            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES),
                  end="")
            # Makes a test minibatch.
            inputs = [
                test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, False)

            # Gets outputs, argmax, and compares them with the label.
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if v > maxval:
                        maxval = v
                        argmax = j

                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("epoch %d: accuracy: %.2f%%" % (epoch, accuracy))

    return 0
示例#21
0
 def __init__(self, in_size, out_size, trainer):
     self.pw_ = Parameter([out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pb_ = Parameter([out_size], I.Constant(0))
     trainer.add_parameter(self.pw_)
     trainer.add_parameter(self.pb_)
示例#22
0
 def setUp(self):
     self.dev = D.Naive()
     Device.set_default(self.dev)
     self.p = Parameter([8], I.Constant(0))
     self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
示例#23
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Initializes 2 device objects which manage different GPUs.
    dev0 = D.CUDA(0)
    dev1 = D.CUDA(1)

    # Parameters on GPU 0.
    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0)
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0)

    # Parameters on GPU 1.
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1)
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1)

    trainer = T.SGD(.1)
    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    def make_graph(inputs):
        # We first store input values explicitly on GPU 0.
        x = F.input(inputs, device=dev0)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        # The hidden layer is calculated and implicitly stored on GPU 0.
        h_on_gpu0 = F.relu(w1 @ x + b1)
        # `copy()` transfers the hiddne layer to GPU 1.
        h_on_gpu1 = F.copy(h_on_gpu0, dev1)
        # The output layer is calculated and implicitly stored on GPU 1.
        return w2 @ h_on_gpu1 + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            trainer.reset_gradients()
            avg_loss.backward()
            trainer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
示例#24
0
    def test_Parameter_argument(self):
        # shape w/o data
        p = Parameter(Shape([2, 3]))
        self.assertEqual(p.shape(), Shape([2, 3]))

        # shape w/ Initializer
        p = Parameter(Shape([4, 3]), I.Constant(1))
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), [1] * 12)

        # shape w/ list[float]
        p = Parameter(Shape([4, 3]), self.list_data[:12])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/o shape
        p = Parameter(init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/ shape
        p = Parameter(Shape([2, 6]), init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([2, 6]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # list[float] w/o shape
        self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
示例#25
0
 def test_model_load_save(self):
     submodel = TestModel()
     sp1 = Parameter([2, 4], I.Constant(0))
     sp1.value = tF.input(np.array([[0, 1, 2, 3], [4, 5, 6, 7]]))
     sp2 = Parameter([2, 4], I.Constant(0))
     sp2.value = tF.input(np.array([[9, 8, 7, 6], [5, 4, 3, 2]]))
     submodel.add("sp1", sp1)
     submodel.add("sp2", sp2)
     parentmodel = TestModel()
     p1 = Parameter([4, 2], I.Constant(0))
     p1.value = tF.input(np.array([[0, 1], [2, 3], [4, 5], [6, 7]]))
     p2 = Parameter([4, 2], I.Constant(0))
     p2.value = tF.input(np.array([[9, 8], [7, 6], [5, 4], [3, 2]]))
     parentmodel.add("p1", p1)
     parentmodel.add("p2", p2)
     parentmodel.add("sub", submodel)
     submodel_load = TestModel()
     sp1 = Parameter()
     sp2 = Parameter()
     submodel_load.add("sp1", sp1)
     submodel_load.add("sp2", sp2)
     parentmodel_load = TestModel()
     p1 = Parameter()
     p2 = Parameter()
     parentmodel_load.add("p1", p1)
     parentmodel_load.add("p2", p2)
     parentmodel_load.add("sub", submodel_load)
     with tempfile.NamedTemporaryFile() as fp:
         parentmodel.save(fp.name)
         parentmodel_load.load(fp.name)
     self.assertTrue(
         (parentmodel_load["p1"].value.to_ndarrays()[0] == np.array(
             [[0, 1], [2, 3], [4, 5], [6, 7]])).all())
     self.assertTrue(
         (parentmodel_load["p2"].value.to_ndarrays()[0] == np.array(
             [[9, 8], [7, 6], [5, 4], [3, 2]])).all())
     self.assertTrue(
         (parentmodel_load["sub", "sp1"].value.to_ndarrays()[0] == np.array(
             [[0, 1, 2, 3], [4, 5, 6, 7]])).all())
     self.assertTrue(
         (parentmodel_load["sub", "sp2"].value.to_ndarrays()[0] == np.array(
             [[9, 8, 7, 6], [5, 4, 3, 2]])).all())
示例#26
0
 def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size,
              hidden_size, dropout_rate):
     self.name_ = name
     self.embed_size_ = embed_size
     self.dropout_rate_ = dropout_rate
     self.psrc_lookup_ = Parameter([embed_size, src_vocab_size],
                                   I.XavierUniform())
     self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size],
                                   I.XavierUniform())
     self.pwhj_ = Parameter([embed_size, 2 * hidden_size],
                            I.XavierUniform())
     self.pbj_ = Parameter([embed_size], I.Constant(0))
     self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform())
     self.pby_ = Parameter([trg_vocab_size], I.Constant(0))
     self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size,
                              hidden_size)
     self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size,
                              hidden_size)
     self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size)
示例#27
0
 def __init__(self, name, in_size, out_size):
     self.name_ = name
     self.out_size_ = out_size
     self.pwxh_ = Parameter([4 * out_size, in_size], I.XavierUniform())
     self.pwhh_ = Parameter([4 * out_size, out_size], I.XavierUniform())
     self.pbh_ = Parameter([4 * out_size], I.Constant(0))
示例#28
0
 def __init__(self, in_size, out_size):
     self.out_size = out_size
     self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pbf = Parameter([out_size], I.Constant(0))
     self.pbr = Parameter([out_size], I.Constant(0))
     self.add_all_parameters()
示例#29
0
 def test_model_parameter(self):
     model = Model()
     param = Parameter()
     model.add("p", param)
     self.assertIs(model["p"], param)
     self.assertIs(model[("p", )], param)
示例#30
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform())
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0))
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform())
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0))

    optimizer = O.SGD(.5)
    optimizer.add(pw1, pb1, pw2, pb2)

    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, False)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))