#coding:utf-8 import gluonbook as gb from mxnet import gluon, init from mxnet.gluon import loss as gloss, nn drop_prob1 = 0.2 drop_prob2 = 0.5 net = nn.Sequential() net.add(nn.Flatten()) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dropout(drop_prob1)) net.add(nn.Dense(256, activation='relu')) net.add(nn.Dropout(drop_prob2)) net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) num_epoch = 40 batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5}) gb.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, None, None, trainer)
num_epochs, batch_size, params=None, lr=None, trainer=None): for epoch in range(num_epochs): train_l_sum = 0 train_acc_sum = 0 for X, y in train_iter: with autograd.record(): y_hat = net(X) l = loss(y_hat, y) l.backward() if trainer is None: gb.sgd(params, lr, batch_size) else: trainer.step(batch_size) train_l_sum += l.mean().asscalar() train_acc_sum += accuracy(y_hat, y) print('epoch %d, loss %.4f, train acc %.3f' % (epoch + 1, train_l_sum / len(train_iter), train_acc_sum / len(train_iter))) num_epochs = 500 gb.train_ch3(net, data_iter, loss, num_epochs, batch_size, None, None, trainer)
import gluonbook as gb import mxnet as mx from mxnet import nd, autograd, gluon, init from mxnet.gluon import loss as gloss, nn from time import time net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'), nn.MaxPool2D(pool_size=2, strides=2), nn.Dense(120, activation='sigmoid'), nn.Dense(84, activation='sigmoid'), nn.Dense(10)) gb.train_ch3() # X = nd.random.uniform(shape=(1,1,28,28)) # print(X) # # net.initialize() # for layer in net: # X = layer(X) # print(layer.name,'output shape: ',X.shape) batch_size = 256 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size=batch_size) print(len(train_iter), len(test_iter)) def evaluate_accuracy(data_iter, net): acc = nd.array([0])
num_output = 10 num_hidden = 1024 W1 = nd.random.normal(scale=0.01, shape=(num_imput, num_hidden)) b1 = nd.zeros(num_hidden) W2 = nd.random.normal(scale=0.01, shape=(num_hidden, num_output)) b2 = nd.zeros(num_output) params = [W1, b1, W2, b2] for param in params: param.attach_grad() def relu(X): return nd.maximum(X, 0) def net(X): X = X.reshape((-1, num_imput)) H = relu(nd.dot(X, W1) + b1) return nd.dot(H, W2) + b2 loss = gloss.SoftmaxCrossEntropyLoss() num_epoch = 5 lr = 0.5 gb.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, params, lr)