def _train(self, corpus_indices, num_steps, hyper_params, epochs, is_random_iter): """ train function """ if is_random_iter: data_iter_fn = data_iter_random else: data_iter_fn = data_iter_consecutive batch_size = hyper_params.get("batch_size", 32) clipping_theta = hyper_params.get("clipping_theta", 1e-2) lr = hyper_params.get("lr", 1e2) history_loss = [] for epoch in range(epochs): total_loss, total_num, start = 0.0, 0, time.time() state = None if not is_random_iter: state = self.begin_state(batch_size) data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx=self.ctx) for x, y in data_iter: if is_random_iter: state = self.begin_state(batch_size) else: for s in state: s.detach() with autograd.record(): inputs = nd.one_hot(x.T, self.vocab_size) y_hat, state = self.forward(inputs, state) y = y.T.reshape((-1, )) batch_loss = self.loss(y_hat, y).mean() batch_loss.backward() if not self.parameters or len(self.parameters) <= 0: self.parameters = [ p.data() for p in self.collect_params().values() ] grad_clipping(self.parameters, clipping_theta, self.ctx) if self.trainer: self.trainer.step(1) else: sgd(self.parameters, lr, 1) total_num += y.size total_loss += batch_loss.asscalar() * y.size history_loss.append(total_loss / total_num) if (epoch + 1) % 50 == 0: print("epoch {}, perplexity {}, time {} sec".format( epoch + 1, math.exp(total_loss / total_num), time.time() - start)) print(self.predict_rnn("分开", 50)) print(self.predict_rnn("不分开", 50)) return history_loss
def main(): start_time = time() print("---------- main1 --------------") f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r') f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r') l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r') l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r') X_train = np.frombuffer(f0.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) X_test = np.frombuffer(f1.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8) y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8) y_train = one_hot_encoding(y_train) y_label = one_hot_encoding(y_test) mean = np.mean(X_train) std = np.std(X_train) X_train, X_test = X_train - mean, X_test - mean X_train, X_test = X_train / std, X_test / std model = neural_network((89, 'TanH'), (10, 'Sigmoid'), input_nodes=784, seed=20190119) model = fit(x_train=X_train, y_train=y_train, x_test=X_test, y_test=y_label, model=model, optimizer=sgd(epochs=50, eta=0.35, etaN=0.15, decay_type='exponential'), batch_size=60, eval_every=5, early_stop=True, seed=20190119) validate_accuracy(x_test=X_test, y_test=y_test, model=model) # print(model[0][0][0].shape) # print(np.sum(model[0][0][0])) # print(model[0][0][1].shape) # print(np.sum(model[0][0][1])) # # print(model[1][0][0].shape) # print(np.sum(model[1][0][0])) # print(model[1][0][1].shape) # print(np.sum(model[1][0][1])) # print() print("--- %s seconds ---" % (time() - start_time))
def main(): start_time = time() print("---------- main5 --------------") f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r') f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r') l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r') l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r') X_train = np.frombuffer(f0.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) X_test = np.frombuffer(f1.read(), dtype=np.uint8, offset=16).reshape(-1, 28 * 28) y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8) y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8) y_train = one_hot_encoding(y_train) y_label = one_hot_encoding(y_test) mean = np.mean(X_train) std = np.std(X_train) X_train, X_test = X_train - mean, X_test - mean X_train, X_test = X_train / std, X_test / std model = neural_network((89, 'TanH'), (10, 'Softmax'), input_nodes=784, seed=20190119, weight_init='scaled') model = fit(x_train=X_train, y_train=y_train, x_test=X_test, y_test=y_label, model=model, optimizer=sgd(epochs=50, eta=0.15, etaN=0.05, decay_type='exponential', beta=0.85), batch_size=60, eval_every=5, early_stop=True, loss_function='cross-entropy', seed=20190119, dropout=0.8) validate_accuracy(x_test=X_test, y_test=y_test, model=model) print("--- %s seconds ---" % (time() - start_time))
# Fully connected layer with 50 neurons fc1 = layers.FullyConnected(np.prod(pool2.out_dim), 50) # Activation for fully connected layer of 50 neurons is tanh tanh = mf.TanH() # Fully connected layer with 10 neurons 'output layer' out = layers.FullyConnected(50, num_classes) cnn = layers.CNN([conv1, sig, pool1, conv2, relu, pool2, flat, fc1, tanh, out]) mf.model_summary(cnn, 'cnn_model_plot.png', f) e_nnet, e_accuracy, e_validate, e_loss, e_loss_val = mf.sgd(cnn, x_train, y_train, f, minibatch_size=200, epoch=20, learning_rate=0.01) best_net = mf.plot_history(e_loss, e_accuracy, e_validate, e_loss_val) mb = mf.batchdata(x_test, 1000) pred = [] for j in range(len(mb)): pred.append(e_nnet[best_net[0]].predict(mb[j])) pv = np.concatenate(pred, axis=0) # y_pred = e_nnet[best_net[0]].predict(x_test) print('Test Set Accuracy with best model parameters: {}'.format( mf.accuracy(y_test, pv)))