示例#1
0
    def _train(self, corpus_indices, num_steps, hyper_params, epochs,
               is_random_iter):
        """ train function """
        if is_random_iter:
            data_iter_fn = data_iter_random
        else:
            data_iter_fn = data_iter_consecutive

        batch_size = hyper_params.get("batch_size", 32)
        clipping_theta = hyper_params.get("clipping_theta", 1e-2)
        lr = hyper_params.get("lr", 1e2)

        history_loss = []
        for epoch in range(epochs):
            total_loss, total_num, start = 0.0, 0, time.time()

            state = None
            if not is_random_iter:
                state = self.begin_state(batch_size)

            data_iter = data_iter_fn(corpus_indices,
                                     batch_size,
                                     num_steps,
                                     ctx=self.ctx)
            for x, y in data_iter:
                if is_random_iter:
                    state = self.begin_state(batch_size)
                else:
                    for s in state:
                        s.detach()

                with autograd.record():
                    inputs = nd.one_hot(x.T, self.vocab_size)
                    y_hat, state = self.forward(inputs, state)
                    y = y.T.reshape((-1, ))
                    batch_loss = self.loss(y_hat, y).mean()
                batch_loss.backward()

                if not self.parameters or len(self.parameters) <= 0:
                    self.parameters = [
                        p.data() for p in self.collect_params().values()
                    ]
                grad_clipping(self.parameters, clipping_theta, self.ctx)

                if self.trainer:
                    self.trainer.step(1)
                else:
                    sgd(self.parameters, lr, 1)

                total_num += y.size
                total_loss += batch_loss.asscalar() * y.size

            history_loss.append(total_loss / total_num)
            if (epoch + 1) % 50 == 0:
                print("epoch {}, perplexity {}, time {} sec".format(
                    epoch + 1, math.exp(total_loss / total_num),
                    time.time() - start))
                print(self.predict_rnn("分开", 50))
                print(self.predict_rnn("不分开", 50))
        return history_loss
示例#2
0
def main():
    start_time = time()
    print("---------- main1 --------------")
    f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r')
    f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r')
    l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r')
    l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r')
    X_train = np.frombuffer(f0.read(), dtype=np.uint8,
                            offset=16).reshape(-1, 28 * 28)
    X_test = np.frombuffer(f1.read(), dtype=np.uint8,
                           offset=16).reshape(-1, 28 * 28)
    y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8)
    y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8)

    y_train = one_hot_encoding(y_train)
    y_label = one_hot_encoding(y_test)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train, X_test = X_train - mean, X_test - mean
    X_train, X_test = X_train / std, X_test / std

    model = neural_network((89, 'TanH'), (10, 'Sigmoid'),
                           input_nodes=784,
                           seed=20190119)
    model = fit(x_train=X_train,
                y_train=y_train,
                x_test=X_test,
                y_test=y_label,
                model=model,
                optimizer=sgd(epochs=50,
                              eta=0.35,
                              etaN=0.15,
                              decay_type='exponential'),
                batch_size=60,
                eval_every=5,
                early_stop=True,
                seed=20190119)

    validate_accuracy(x_test=X_test, y_test=y_test, model=model)

    # print(model[0][0][0].shape)
    # print(np.sum(model[0][0][0]))
    # print(model[0][0][1].shape)
    # print(np.sum(model[0][0][1]))
    #
    # print(model[1][0][0].shape)
    # print(np.sum(model[1][0][0]))
    # print(model[1][0][1].shape)
    # print(np.sum(model[1][0][1]))
    # print()
    print("--- %s seconds ---" % (time() - start_time))
示例#3
0
def main():
    start_time = time()
    print("---------- main5 --------------")
    f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r')
    f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r')
    l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r')
    l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r')
    X_train = np.frombuffer(f0.read(), dtype=np.uint8,
                            offset=16).reshape(-1, 28 * 28)
    X_test = np.frombuffer(f1.read(), dtype=np.uint8,
                           offset=16).reshape(-1, 28 * 28)
    y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8)
    y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8)

    y_train = one_hot_encoding(y_train)
    y_label = one_hot_encoding(y_test)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train, X_test = X_train - mean, X_test - mean
    X_train, X_test = X_train / std, X_test / std

    model = neural_network((89, 'TanH'), (10, 'Softmax'),
                           input_nodes=784,
                           seed=20190119,
                           weight_init='scaled')
    model = fit(x_train=X_train,
                y_train=y_train,
                x_test=X_test,
                y_test=y_label,
                model=model,
                optimizer=sgd(epochs=50,
                              eta=0.15,
                              etaN=0.05,
                              decay_type='exponential',
                              beta=0.85),
                batch_size=60,
                eval_every=5,
                early_stop=True,
                loss_function='cross-entropy',
                seed=20190119,
                dropout=0.8)

    validate_accuracy(x_test=X_test, y_test=y_test, model=model)

    print("--- %s seconds ---" % (time() - start_time))
示例#4
0
# Fully connected layer with 50 neurons
fc1 = layers.FullyConnected(np.prod(pool2.out_dim), 50)
# Activation for fully connected layer of 50 neurons is tanh
tanh = mf.TanH()

# Fully connected layer with 10 neurons 'output layer'
out = layers.FullyConnected(50, num_classes)

cnn = layers.CNN([conv1, sig, pool1, conv2, relu, pool2, flat, fc1, tanh, out])

mf.model_summary(cnn, 'cnn_model_plot.png', f)

e_nnet, e_accuracy, e_validate, e_loss, e_loss_val = mf.sgd(cnn,
                                                            x_train,
                                                            y_train,
                                                            f,
                                                            minibatch_size=200,
                                                            epoch=20,
                                                            learning_rate=0.01)

best_net = mf.plot_history(e_loss, e_accuracy, e_validate, e_loss_val)
mb = mf.batchdata(x_test, 1000)
pred = []
for j in range(len(mb)):
    pred.append(e_nnet[best_net[0]].predict(mb[j]))
pv = np.concatenate(pred, axis=0)

# y_pred = e_nnet[best_net[0]].predict(x_test)

print('Test Set Accuracy with best model parameters: {}'.format(
    mf.accuracy(y_test, pv)))