from mxnet import init, gluon from mxnet.gluon import loss as gloss, nn import utils batch_size = 256 num_inputs = 784 num_outputs = 10 num_hiddens = 256 num_epochs = 100 dropout_prob1 = 0.2 dropout_prob2 = 0.5 learning_rate = 0.1 train_iter, test_iter = utils.load_fashion_mnist(batch_size) net = nn.Sequential() net.add(nn.Dense(num_hiddens, activation='relu')) net.add(nn.Dropout(dropout_prob1)) net.add(nn.Dense(num_hiddens, activation='relu')) net.add(nn.Dropout(dropout_prob2)) net.add(nn.Dense(num_outputs)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate}) utils.train_mnist(net=net, train_iter=train_iter, test_iter=test_iter, loss=loss, num_epochs=num_epochs, batch_size=batch_size, trainer=trainer)
def dropout(X, dropout_prob): assert 0 <= dropout_prob <= 1 keep_prob = 1 - dropout_prob if keep_prob == 0: return X.zeros_like() mask = nd.random.uniform(0, 1, X.shape) < keep_prob return mask * X / keep_prob def net(X): X = X.reshape((-1, num_inputs)) H1 = (nd.dot(X, W1) + b1).relu() if autograd.is_training(): H1 = dropout(H1, dropout_prob1) H2 = (nd.dot(H1, W2) + b2).relu() if autograd.is_training(): H2 = dropout(H2, dropout_prob2) return nd.dot(H2, W3) + b3 loss = gloss.SoftmaxCrossEntropyLoss() utils.train_mnist(net=net, train_iter=train_iter, test_iter=test_iter, loss=loss, num_epochs=num_epochs, batch_size=batch_size, params=params, learning_rate=learning_rate)
import utils from mxnet import gluon, init from mxnet.gluon import data as gdata, nn, loss as gloss num_inputs = 784 num_outputs = 10 batch_size = 256 num_workers = 4 num_epochs = 10 learning_rate = 0.1 # Load data mnist_train = gdata.vision.FashionMNIST(train=True) mnist_test = gdata.vision.FashionMNIST(train=False) transformer = gdata.vision.transforms.ToTensor() train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) # Construct neural network model net = nn.Sequential() net.add(nn.Dense(num_outputs)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate}) utils.train_mnist(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
from mxnet import nd from mxnet.gluon import data as gdata def net(X): return utils.softmax(nd.dot(X.reshape((-1, num_inputs)), w) + b) num_inputs = 784 num_outputs = 10 batch_size = 256 num_workers = 4 num_epochs = 100 learning_rate = 0.1 w = nd.random.normal(scale=1.0, shape=(num_inputs, num_outputs)) b = nd.zeros(num_outputs) w.attach_grad() b.attach_grad() mnist_train = gdata.vision.FashionMNIST(train=True) mnist_test = gdata.vision.FashionMNIST(train=False) transformer = gdata.vision.transforms.ToTensor() train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, shuffle=True, num_workers=num_workers) test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, shuffle=False, num_workers=num_workers) utils.train_mnist(net, train_iter, test_iter, utils.cross_entropy, num_epochs, batch_size, [w, b], learning_rate)