def train(num_gpus, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.spilt_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
        nd.waitall()
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
        print('epoch %d, time %.1f sec, test_acc %.2f' %
              (epoch + 1, train_time, test_acc))
Пример #2
0
def lenet():
    """

    :return:
    """

    net = nn.Sequential()
    net.add(
        nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
        nn.MaxPool2D(pool_size=2, strides=2),
        # Dense会默认将(批量大小, 通道, 高, 宽)形状的输入转换成
        # (批量大小, 通道 * 高 * 宽)形状的输入
        nn.Dense(120, activation='sigmoid'),
        nn.Dense(84, activation='sigmoid'),
        nn.Dense(10))
    X = nd.random.uniform(shape=(1, 1, 28, 28))
    net.initialize()
    for layer in net:
        X = layer(X)
        print(layer.name, 'output shape:\t', X.shape)

    lr, num_epochs, batch_size = 0.9, 5, 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

    net.initialize(force_reinit=True, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})

    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, mxnet.cpu(),
                  num_epochs)
Пример #3
0
def basic_multilayer():
    """
    基本的多层感知机实现
    :return:
    """

    def relu(X):
        return nd.maximum(X, 0)

    def net(X):
        X = X.reshape((-1, num_inputs))
        H = relu(nd.dot(X, W1) + b1)
        return nd.dot(H, W2) + b2

    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    num_inputs, num_outputs, num_hiddens = 784, 10, 256

    W1 = nd.random.normal(scale=0.01, shape=(num_inputs, num_hiddens))
    b1 = nd.zeros(num_hiddens)
    W2 = nd.random.normal(scale=0.01, shape=(num_hiddens, num_outputs))
    b2 = nd.zeros(num_outputs)
    params = [W1, b1, W2, b2]

    for param in params:
        param.attach_grad()

    loss = gloss.SoftmaxCrossEntropyLoss()

    num_epochs, lr = 5, 0.5
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
Пример #4
0
def main():
    batch_size = 256
    # 下载、读取数据集
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    num_inputs = 784
    num_outputs = 10

    W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
    b = nd.zeros(num_outputs)

    # 附上梯度,开辟梯度缓冲区
    W.attach_grad()
    b.attach_grad()

    # 迭代周期数 学习率
    num_epochs, lr = 5, 0.1
    train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs,
              batch_size, num_inputs, W, b, [W, b], lr)
    for X, y in test_iter:
        print(X, y)
        break
    true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
    pred_labels = d2l.get_fashion_mnist_labels(
        net(X, num_inputs, W, b).argmax(axis=1).asnumpy())
    titles = [
        true + '\n' + pred for true, pred in zip(true_labels, pred_labels)
    ]
    show_fashion_mnist(X[0:9], titles[0:9])
Пример #5
0
def densenet():
    """
    DenseNet的主要构建模块是稠密块(dense block)和过渡层(transition layer)。前者定义了输入和输出是如何连结的,后者则用来控制
    通道数,使之不过大。
    :return:
    """
    class DenseBlock(nn.Block):
        def __init__(self, num_convs, num_channels, **kwargs):
            super(DenseBlock, self).__init__(**kwargs)
            self.net = nn.Sequential()
            for _ in range(num_convs):
                self.net.add(conv_block(num_channels))

        def forward(self, X):
            for blk in self.net:
                Y = blk(X)
                X = nd.concat(X, Y, dim=1)  # 在通道维上将输入和输出连结
            return X

    def conv_block(num_channels):
        blk = nn.Sequential()
        blk.add(nn.BatchNorm(), nn.Activation('relu'),
                nn.Conv2D(num_channels, kernel_size=3, padding=1))
        return blk

    def transition_block(num_channels):
        blk = nn.Sequential()
        blk.add(nn.BatchNorm(), nn.Activation('relu'),
                nn.Conv2D(num_channels, kernel_size=1),
                nn.AvgPool2D(pool_size=2, strides=2))
        return blk

    net = nn.Sequential()
    net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(),
            nn.Activation('relu'),
            nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    num_channels, growth_rate = 64, 32  # num_channels为当前的通道数
    num_convs_in_dense_blocks = [4, 4, 4, 4]

    for i, num_convs in enumerate(num_convs_in_dense_blocks):
        net.add(DenseBlock(num_convs, growth_rate))
        # 上一个稠密块的输出通道数
        num_channels += num_convs * growth_rate
        # 在稠密块之间加入通道数减半的过渡层
        if i != len(num_convs_in_dense_blocks) - 1:
            num_channels //= 2
            net.add(transition_block(num_channels))

    net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(),
            nn.Dense(10))

    lr, num_epochs, batch_size, ctx = 0.1, 5, 256, d2l.try_gpu()
    net.initialize(ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #6
0
    def generate(self):
        net = nn.Sequential()
        net.add(nn.Dense(256, activation='relu'), nn.Dense(10))
        net.initialize(init.Normal(sigma=0.01))
        batch_size = 256
        train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

        loss = gloss.SoftmaxCrossEntropyLoss()
        trainer = gluon.Trainer(net.collect_params(), 'sgd',
                                {'learning_rate': 0.5})
        num_epochs = 5
        d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
                      None, None, trainer)
Пример #7
0
def method1():
    num_epochs, lr, batch_size = 5, 0.5, 256
    loss = gloss.SoftmaxCrossEntropyLoss()
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    net = nn.Sequential()
    net.add(nn.Dense(num_hiddens1, activation="relu"),
            nn.Dropout(drop_prob1),
            nn.Dense(num_hiddens2, activation="relu"),
            nn.Dropout(drop_prob2),
            nn.Dense(num_outputs))
    net.initialize(init.Normal(sigma=0.01))
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None,
                  None, trainer)
Пример #8
0
def main():
    batch_size = 256
    # 下载、读取数据集
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    # 定义初始化模型
    net = nn.Sequential()
    net.add(nn.Dense(10))
    net.initialize(init.Normal(sigma=0.01))
    # 定义损失函数
    loss = gloss.SoftmaxCrossEntropyLoss()
    # 定义优化算法
    trainer = gluon.Trainer(net.collect_params(), 'sgd',
                            {'learning_rate': 0.01})
    num_epochs = 5
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
                  None, None, trainer)
Пример #9
0
def vgg():
    """

    :return:
    """
    def vgg_block(num_convs, num_channels):
        blk = nn.Sequential()
        for _ in range(num_convs):
            blk.add(
                nn.Conv2D(num_channels,
                          kernel_size=3,
                          padding=1,
                          activation='relu'))
        blk.add(nn.MaxPool2D(pool_size=2, strides=2))
        return blk

    def vgg(conv_arch):
        net = nn.Sequential()
        # 卷积层部分
        for (num_convs, num_channels) in conv_arch:
            net.add(vgg_block(num_convs, num_channels))
        # 全连接层部分
        net.add(nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
                nn.Dense(4096, activation='relu'), nn.Dropout(0.5),
                nn.Dense(10))
        return net

    conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))
    net = vgg(conv_arch)

    net.initialize()
    X = nd.random.uniform(shape=(1, 1, 224, 224))
    for blk in net:
        X = blk(X)
        print(blk.name, 'output shape:\t', X.shape)

    ratio = 4
    small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
    net = vgg(small_conv_arch)

    lr, num_epochs, batch_size, ctx = 0.05, 5, 128, d2l.try_gpu()
    net.initialize(ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #10
0
def dropout_gluon():
    drop_prob1, drop_prob2, lr, batch_size, num_epochs = 0.2, 0.5, 0.1, 64, 50

    net = nn.Sequential()
    net.add(
        nn.Dense(256, activation="relu"),
        nn.Dropout(drop_prob1),  # 在第一个全连接层后添加丢弃层
        nn.Dense(256, activation="relu"),
        nn.Dropout(drop_prob2),  # 在第二个全连接层后添加丢弃层
        nn.Dense(10))
    net.initialize(init.Normal(sigma=0.01))

    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    loss = gloss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
                  None, None, trainer)
Пример #11
0
def simple_multilayer():
    """
    多层感知机简洁实现
    :return:
    """
    net = nn.Sequential()
    net.add(nn.Dense(256, activation='relu'), nn.Dense(10))
    net.add(gluon.nn.Dropout(0.2))
    net.initialize(init.Normal(sigma=0.01))

    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    loss = gloss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})

    num_epochs = 10
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
Пример #12
0
def nin():
    """

    :return:
    """
    def nin_block(num_channels, kernel_size, strides, padding):
        blk = nn.Sequential()
        blk.add(
            nn.Conv2D(num_channels,
                      kernel_size,
                      strides,
                      padding,
                      activation='relu'),
            nn.Conv2D(num_channels, kernel_size=1, activation='relu'),
            nn.Conv2D(num_channels, kernel_size=1, activation='relu'))
        return blk

    net = nn.Sequential()
    net.add(
        nin_block(96, kernel_size=11, strides=4, padding=0),
        nn.MaxPool2D(pool_size=3, strides=2),
        nin_block(256, kernel_size=5, strides=1, padding=2),
        nn.MaxPool2D(pool_size=3, strides=2),
        nin_block(384, kernel_size=3, strides=1, padding=1),
        nn.MaxPool2D(pool_size=3, strides=2),
        nn.Dropout(0.5),
        # 标签类别数是10
        nin_block(10, kernel_size=3, strides=1, padding=1),
        # 全局平均池化层将窗口形状自动设置成输入的高和宽
        nn.GlobalAvgPool2D(),
        # 将四维的输出转成二维的输出,其形状为(批量大小, 10)
        nn.Flatten())
    X = nd.random.uniform(shape=(1, 1, 224, 224))
    net.initialize()
    for layer in net:
        X = layer(X)
        print(layer.name, 'output shape:\t', X.shape)

    lr, num_epochs, batch_size, ctx = 0.1, 5, 128, d2l.try_gpu()
    net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
def train(num_gpus, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    ctx = [mx.gpu(i) for i in range(num_gpus)]
    print('runing on:', ctx)
    #将模型参数复制到num_gpus块显卡的显存上
    gpu_params = [get_params(params, c) for c in ctx]
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            #对单个小批量进行多GPU训练
            train_batch(X, y, gpu_params, ctx, lr)
            nd.waitall()
        train_time = time.time() - start

        def net(x):  #在gpu(0)上验证模型
            return lenet(x, gpu_params[0])

        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
        print('epoch %d, time %.1f sec, test_acc %.2f' %
              (epoch + 1, train_time, test_acc))
Пример #14
0
def batchnormalization_simple():
    """
    Gluon中nn模块定义的BatchNorm类使用起来更加简单。它不需要指定自己定义的BatchNorm类中所需的num_features和num_dims参数值。
    在Gluon中,这些参数值都将通过延后初始化而自动获取。下面我们用Gluon实现使用批量归一化的LeNet。
    :return:
    """

    net = nn.Sequential()
    net.add(nn.Conv2D(6, kernel_size=5), nn.BatchNorm(),
            nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
            nn.Conv2D(16, kernel_size=5), nn.BatchNorm(),
            nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
            nn.Dense(120), nn.BatchNorm(), nn.Activation('sigmoid'),
            nn.Dense(84), nn.BatchNorm(), nn.Activation('sigmoid'),
            nn.Dense(10))

    lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu()
    net.initialize(ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #15
0
def train(num_gpus, batch_size, lr):

    comm = MPI.COMM_WORLD
    comm_rank = comm.Get_rank()
    comm_size = comm.Get_size()

    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    #ctx = [mx.gpu(i) for i in range(num_gpus)]
    if comm_rank == 0:
        ctx = mx.gpu(0)
    else:
        ctx = mx.gpu(1)
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(net.collect_params(),
                            'sgd', {'learning_rate': lr},
                            SSP_FLAG=True,
                            thre=2)
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(400000):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [
                    loss(net(gpu_X), gpu_y)
                    for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)
                ]
            for l in ls:
                l.backward()
            trainer.step(epoch, batch_size)
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[comm_rank])
        print('epoch %d, time %.1f sec, test acc %.2f, process %d' %
              (epoch + 1, train_time, test_acc, comm_rank))
Пример #16
0
        print(len(self.parameters))
        for i, k in enumerate(keys):
            if i not in [30, 31]:
                sess.run(self.parameters[i].assign(weights[k]))
        print('-------------all done------------------')


x_imgs = tf.placeholder(tf.float32, [None, 96, 96, 1])
vgg = vgg16(x_imgs, 10)
y_imgs = tf.placeholder(tf.int32, [None, 10])
fc3_cat_and_dog = vgg.probs
batch_size = 500
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog,
                                            labels=y_imgs))
optimizer = tf.train.GradientDescentOptimizer(
    learning_rate=0.01).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
train_iter, test_iter = d2l.load_data_fashion_mnist(200, resize=96)
epoch = 5
for i in range(epoch):
    j = 0
    for features, labels in train_iter:
        X_train = nd.transpose(features, axes=(0, 2, 3, 1)).asnumpy()
        y_train = labels.asnumpy()
        Y_train = np_utils.to_categorical(y_train, 10)
        sess.run(optimizer, feed_dict={x_imgs: X_train, y_imgs: Y_train})
        j = j + 1
        if j % 10 == 0:
            print(sess.run(loss, feed_dict={x_imgs: X_train, y_imgs: Y_train}))
Пример #17
0
 @Author     : 剑怜情
'''
from mxnet.gluon import data as gdata
from mxnet.gluon import nn
import d2lzh as d2l
from mxnet import nd,autograd,gluon
from mxnet import initializer as init

mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)
print("mnist_train.shape:",len(mnist_train))
print("mnist_test.shape:",len(mnist_test))

# 处理成iter
batch_size=64
train_iter,test_iter = d2l.load_data_fashion_mnist(64)

# 定义W,b
num_input = 784
num_outputs = 10

# W = nd.random.normal(scale=1,shape=(num_input,num_outputs))
# b = nd.zeros(shape=num_outputs)

# W.attach_grad()
# b.attach_grad()

# 定义模型、分类模型、损失函数
def softmax(X):
    x_exp = X.exp()
    partition = x_exp.sum(axis=1,keepdims = True)
Пример #18
0
def method0():
    num_epochs, lr, batch_size = 5, 0.5, 256
    loss = gloss.SoftmaxCrossEntropyLoss()
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
                  params, lr)
Пример #19
0
def googlenet():
    """
    GoogLeNet吸收了NiN中网络串联网络的思想,并在此基础上做了很大改进。在随后的几年里,研究人员对GoogLeNet进行了数次改进,本节将介绍这个模型系列的第一个版本。
    :return:
    """
    class Inception(nn.Block):
        # c1 - c4为每条线路里的层的输出通道数
        def __init__(self, c1, c2, c3, c4, **kwargs):
            super(Inception, self).__init__(**kwargs)
            # 线路1,单1 x 1卷积层
            self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
            # 线路2,1 x 1卷积层后接3 x 3卷积层
            self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
            self.p2_2 = nn.Conv2D(c2[1],
                                  kernel_size=3,
                                  padding=1,
                                  activation='relu')
            # 线路3,1 x 1卷积层后接5 x 5卷积层
            self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
            self.p3_2 = nn.Conv2D(c3[1],
                                  kernel_size=5,
                                  padding=2,
                                  activation='relu')
            # 线路4,3 x 3最大池化层后接1 x 1卷积层
            self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
            self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')

        def forward(self, x):
            p1 = self.p1_1(x)
            p2 = self.p2_2(self.p2_1(x))
            p3 = self.p3_2(self.p3_1(x))
            p4 = self.p4_2(self.p4_1(x))
            return nd.concat(p1, p2, p3, p4, dim=1)  # 在通道维上连结输出

    b1 = nn.Sequential()
    b1.add(
        nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),
        nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    b2 = nn.Sequential()
    b2.add(nn.Conv2D(64, kernel_size=1, activation='relu'),
           nn.Conv2D(192, kernel_size=3, padding=1, activation='relu'),
           nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    b3 = nn.Sequential()
    b3.add(Inception(64, (96, 128), (16, 32), 32),
           Inception(128, (128, 192), (32, 96), 64),
           nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    b4 = nn.Sequential()
    b4.add(Inception(192, (96, 208), (16, 48), 64),
           Inception(160, (112, 224), (24, 64), 64),
           Inception(128, (128, 256), (24, 64), 64),
           Inception(112, (144, 288), (32, 64), 64),
           Inception(256, (160, 320), (32, 128), 128),
           nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    b5 = nn.Sequential()
    b5.add(Inception(256, (160, 320), (32, 128), 128),
           Inception(384, (192, 384), (48, 128), 128), nn.GlobalAvgPool2D())

    net = nn.Sequential()
    net.add(b1, b2, b3, b4, b5, nn.Dense(10))

    X = nd.random.uniform(shape=(1, 1, 96, 96))
    net.initialize()
    for layer in net:
        X = layer(X)
        print(layer.name, 'output shape:\t', X.shape)

    lr, num_epochs, batch_size, ctx = 0.1, 5, 128, d2l.try_gpu()
    net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #20
0
# DLN is free software; you can redistribute it and/or modify it.
#
# DLN is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Contributors:
#     Xiao Wang - initial implementation

import d2lzh as d2l
from mxnet import nd
from mxnet.gluon import loss as gloss

# load data
batch_size = 256
iter_trainning, iter_testing = d2l.load_data_fashion_mnist(batch_size)

# initialize parameters
number_inputs = 784
number_outputs = 10
number_hiddens = 256

W1 = nd.random.normal(scale=0.01, shape=(number_inputs, number_hiddens))
b1 = nd.zeros(number_hiddens)

W2 = nd.random.normal(scale=0.01, shape=(number_hiddens, number_outputs))
b2 = nd.zeros(number_outputs)

parameters = [W1, b1, W2, b2]

for parameter in parameters:
Пример #21
0
class Test5:
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

    def try_gpu(self):  # 本函数已保存在d2lzh包中方便以后使用
        try:
            ctx = mx.gpu()
            _ = nd.zeros((1, ), ctx=ctx)
        except mx.base.MXNetError:
            ctx = mx.cpu()
        return ctx

    # 描述
    def evaluate_accuracy(data_iter, net, ctx):
        acc_sum, n = nd.array([0], ctx=ctx), 0
        for X, y in data_iter:
            # 如果ctx代表GPU及相应的显存,将数据复制到显存上
            X, y = X.as_in_context(ctx), y.as_in_context(ctx).astype('float32')
            acc_sum += (net(X).argmax(axis=1) == y).sum()
            n += y.size
        return acc_sum.asscalar() / n

    def generate(self):
        net = nn.Sequential()
        net.add(
            nn.Conv2D(channels=6, kernel_size=5, activation='sigmoid'),
            nn.MaxPool2D(pool_size=2, strides=2),
            nn.Conv2D(channels=16, kernel_size=5, activation='sigmoid'),
            nn.MaxPool2D(pool_size=2, strides=2),
            # Dense会默认将(批量大小, 通道, 高, 宽)形状的输入转换成
            # (批量大小, 通道 * 高 * 宽)形状的输入
            nn.Dense(120, activation='sigmoid'),
            nn.Dense(84, activation='sigmoid'),
            nn.Dense(10))
        X = nd.random.uniform(shape=(1, 1, 28, 28))
        net.initialize()
        for layer in net:
            X = layer(X)
            print(layer.name, 'output shape:\t', X.shape)

    def train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs):
        print('training on', ctx)
        loss = gloss.SoftmaxCrossEntropyLoss()
        for epoch in range(num_epochs):
            train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
            for X, y in train_iter:
                X, y = X.as_in_context(ctx), y.as_in_context(ctx)
                with autograd.record():
                    y_hat = net(X)
                    l = loss(y_hat, y).sum()
                l.backward()
                trainer.step(batch_size)
                y = y.astype('float32')
                train_l_sum += l.asscalar()
                train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
                n += y.size
            test_acc = evaluate_accuracy(test_iter, net, ctx)
            print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, '
                  'time %.1f sec' %
                  (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc,
                   time.time() - start))

    def main(self):
        lr, num_epochs = 0.9, 5
        net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
        trainer = gluon.Trainer(net.collect_params(), 'sgd',
                                {'learning_rate': lr})
        train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #22
0
def batchnormalization():
    """
    批量归一化利用小批量上的均值和标准差,不断调整神经网络的中间输出,从而使整个神经网络在各层的中间输出的数值更稳定
    :return:
    """
    def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
        # 通过autograd来判断当前模式是训练模式还是预测模式
        if not autograd.is_training():
            # 如果是在预测模式下,直接使用传入的移动平均所得的均值和方差
            X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps)
        else:
            assert len(X.shape) in (2, 4)
            if len(X.shape) == 2:
                # 使用全连接层的情况,计算特征维上的均值和方差
                mean = X.mean(axis=0)
                var = ((X - mean)**2).mean(axis=0)
            else:
                # 使用二维卷积层的情况,计算通道维上(axis=1)的均值和方差。这里我们需要保持
                # X的形状以便后面可以做广播运算
                mean = X.mean(axis=(0, 2, 3), keepdims=True)
                var = ((X - mean)**2).mean(axis=(0, 2, 3), keepdims=True)
            # 训练模式下用当前的均值和方差做标准化
            X_hat = (X - mean) / nd.sqrt(var + eps)
            # 更新移动平均的均值和方差
            moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
            moving_var = momentum * moving_var + (1.0 - momentum) * var
        Y = gamma * X_hat + beta  # 拉伸和偏移
        return Y, moving_mean, moving_var

    class BatchNorm(nn.Block):
        def __init__(self, num_features, num_dims, **kwargs):
            super(BatchNorm, self).__init__(**kwargs)
            if num_dims == 2:
                shape = (1, num_features)
            else:
                shape = (1, num_features, 1, 1)
            # 参与求梯度和迭代的拉伸和偏移参数,分别初始化成1和0
            self.gamma = self.params.get('gamma', shape=shape, init=init.One())
            self.beta = self.params.get('beta', shape=shape, init=init.Zero())
            # 不参与求梯度和迭代的变量,全在内存上初始化成0
            self.moving_mean = nd.zeros(shape)
            self.moving_var = nd.zeros(shape)

        def forward(self, X):
            # 如果X不在内存上,将moving_mean和moving_var复制到X所在显存上
            if self.moving_mean.context != X.context:
                self.moving_mean = self.moving_mean.copyto(X.context)
                self.moving_var = self.moving_var.copyto(X.context)
            # 保存更新过的moving_mean和moving_var
            Y, self.moving_mean, self.moving_var = batch_norm(
                X,
                self.gamma.data(),
                self.beta.data(),
                self.moving_mean,
                self.moving_var,
                eps=1e-5,
                momentum=0.9)
            return Y

    net = nn.Sequential()
    net.add(nn.Conv2D(6, kernel_size=5), BatchNorm(6, num_dims=4),
            nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
            nn.Conv2D(16, kernel_size=5), BatchNorm(16, num_dims=4),
            nn.Activation('sigmoid'), nn.MaxPool2D(pool_size=2, strides=2),
            nn.Dense(120), BatchNorm(120, num_dims=2),
            nn.Activation('sigmoid'), nn.Dense(84), BatchNorm(84, num_dims=2),
            nn.Activation('sigmoid'), nn.Dense(10))

    lr, num_epochs, batch_size, ctx = 1.0, 5, 256, d2l.try_gpu()
    net.initialize(ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)

    net[1].gamma.data().reshape((-1, )), net[1].beta.data().reshape((-1, ))
Пример #23
0
import d2lzh as d2l
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
bathsize = 256
trainer_iter, test_iter = d2l.load_data_fashion_mnist(bathsize)
net = nn.Sequential()
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.SoftmaxCELoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
num = 5
d2l.train_ch3(net, trainer_iter, test_iter, loss, num, bathsize, None, None,
              trainer)
for x, y in test_iter:
    break
truelabes = d2l.get_fashion_mnist_labels(y.asnumpy())
falselabes = d2l.get_fashion_mnist_labels(net(x).argmax(axis=1).asnumpy())
title = [true + '\n' + pred for true, pred in zip(truelabes, falselabes)]
d2l.show_fashion_mnist(x[0:9], title[0:9])
Пример #24
0
def resnet():
    """
    残差块通过跨层的数据通道从而能够训练出有效的深度神经网络。
    :return:
    """
    class Residual(nn.Block):  # 本类已保存在d2lzh包中方便以后使用
        def __init__(self,
                     num_channels,
                     use_1x1conv=False,
                     strides=1,
                     **kwargs):
            super(Residual, self).__init__(**kwargs)
            self.conv1 = nn.Conv2D(num_channels,
                                   kernel_size=3,
                                   padding=1,
                                   strides=strides)
            self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
            if use_1x1conv:
                self.conv3 = nn.Conv2D(num_channels,
                                       kernel_size=1,
                                       strides=strides)
            else:
                self.conv3 = None
            self.bn1 = nn.BatchNorm()
            self.bn2 = nn.BatchNorm()

        def forward(self, X):
            Y = nd.relu(self.bn1(self.conv1(X)))
            Y = self.bn2(self.conv2(Y))
            if self.conv3:
                X = self.conv3(X)
            return nd.relu(Y + X)

    def resnet_block(num_channels, num_residuals, first_block=False):
        blk = nn.Sequential()
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
            else:
                blk.add(Residual(num_channels))
        return blk

    net = nn.Sequential()
    net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(),
            nn.Activation('relu'),
            nn.MaxPool2D(pool_size=3, strides=2, padding=1))

    net.add(resnet_block(64, 2, first_block=True), resnet_block(128, 2),
            resnet_block(256, 2), resnet_block(512, 2))

    net.add(nn.GlobalAvgPool2D(), nn.Dense(10))

    X = nd.random.uniform(shape=(1, 1, 224, 224))
    net.initialize()
    for layer in net:
        X = layer(X)
        print(layer.name, 'output shape:\t', X.shape)

    lr, num_epochs, batch_size, ctx = 0.05, 5, 256, d2l.try_gpu()
    net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
    d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
                  num_epochs)
Пример #25
0
import d2lzh
from mxnet import gluon, init, nd
from mxnet.gluon import loss as gloss, nn

batch_size = 256
train_iter, test_iter = d2lzh.load_data_fashion_mnist(batch_size)
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'), nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.5})
num_epoch = 5
d2lzh.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, None,
                None, trainer)

nd.concat(X, nd.power)
Пример #26
0
class MLP:
    batch_size = 256

    num_inputs, num_outputs, num_hiddens = 784, 10, 256
    W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)),
                      dtype=torch.float)
    b1 = torch.zeros(num_hiddens, dtype=torch.float)
    W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)),
                      dtype=torch.float)
    b2 = torch.zeros(num_outputs, dtype=torch.float)
    params = [W1, b1, W2, b2]
    for param in params:
        param.requires_grad_(requires_grad=True)
    loss = torch.nn.CrossEntropyLoss()
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

    @staticmethod
    def relu(X):
        '''
        激活函数,自己定义的,现在用relu更常用
        '''
        return torch.max(input=X, other=torch.tensor(0.0))

    def net(self, X):
        X = X.view((-1, self.num_inputs))
        H = self.relu(torch.matmul(X, self.W1) + self.b1)
        return torch.matmul(H, self.W2) + self.b2

    def train(self):
        num_epochs, lr = 5, 100.0
        d2l.train_ch3(self.net, self.train_iter, self.test_iter, self.loss,
                      num_epochs, self.batch_size, self.params, lr)
        pass

    @staticmethod
    def xyplot(x_vals, y_vals, name):
        d2l.set_figsize(figsize=(5, 2.5))
        d2l.plt.plot(x_vals.detach().numpy(), y_vals.detach().numpy())
        d2l.plt.xlabel('x')
        d2l.plt.ylabel(name + '(x)')
        plt.show()
        plt.close()

    @staticmethod
    def testxplot(mtd=0):
        """
        H=ϕ(XWh+bh),
        O=HWo+bo,
​       其中ϕ就是激活函数{rele|sigmoid|tanh等}


        """
        x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)

        switcher = {
            0: x.relu,  # 分段函数
            1: x.sigmoid,  # 0-1之间
            2: x.tanh,  # -1到1之间
        }
        if switcher.get(mtd):
            y = switcher.get(mtd)()
            MLP.xyplot(x, y, 'relu')
            y.sum().backward()
            MLP.xyplot(x, x.grad, 'grad of relu')
Пример #27
0
def resnet_block(num_channels, num_residuals, first_block=False):
    blk = nn.Sequential()
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.add(Residual(num_channels, use_1x1conv=True, strides=2))
        else:
            blk.add(Residual(num_channels))
    return blk

net.add(resnet_block(64, 2, first_block=True),
        resnet_block(128, 2),
        resnet_block(256, 2),
        resnet_block(512, 2))

net.add(nn.GlobalAvgPool2D(), nn.Dense(10))

X = nd.random.uniform(shape=(1, 1, 224, 224))
net.initialize()
for layer in net:
    X = layer(X)
    print(layer.name, 'output shape:\t', X.shape)

lr, num_epochs, batch_size, ctx = 0.05, 5, 256, d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx,
              num_epochs)

Пример #28
0
class EasyMLP:
    num_inputs, num_outputs, num_hiddens = 784, 10, 256
    net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs),
    )
    for params in net.parameters():
        init.normal_(params, mean=0, std=0.01)
    batch_size = 256
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    loss = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

    num_epochs = 5

    @staticmethod
    def sgd(params, lr, batch_size):
        # 为了和原书保持一致,这里除以了batch_size,但是应该是不用除的,因为一般用PyTorch计算loss时就默认已经
        # 沿batch维求了平均了。
        for param in params:
            param.data -= lr * param.grad / batch_size  # 注意这里更改param时用的param.data

    @staticmethod
    def evaluate_accuracy(data_iter, net, device=None):
        if device is None and isinstance(net, torch.nn.Module):
            # 如果没指定device就使用net的device
            device = list(net.parameters())[0].device
        acc_sum, n = 0.0, 0
        with torch.no_grad():
            for X, y in data_iter:
                if isinstance(net, torch.nn.Module):
                    net.eval()  # 评估模式, 这会关闭dropout
                    acc_sum += (net(X.to(device)).argmax(
                        dim=1) == y.to(device)).float().sum().cpu().item()
                    net.train()  # 改回训练模式
                else:  # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                    if ('is_training'
                            in net.__code__.co_varnames):  # 如果有is_training这个参数
                        # 将is_training设置成False
                        acc_sum += (net(X, is_training=False).argmax(
                            dim=1) == y).float().sum().item()
                    else:
                        acc_sum += (net(X).argmax(
                            dim=1) == y).float().sum().item()
                n += y.shape[0]
        return acc_sum / n

    def train(self):
        # d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
        for epoch in range(self.num_epochs):
            train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
            for X, y in self.train_iter:
                y_hat = self.net(X)
                l = self.loss(y_hat, y).sum()

                # 梯度清零
                if self.optimizer is not None:
                    self.optimizer.zero_grad()
                elif self.params is not None and self.params[
                        0].grad is not None:
                    for param in self.params:
                        param.grad.data.zero_()

                l.backward()
                if self.optimizer is None:
                    self.sgd(self.params, self.lr, self.batch_size)
                else:
                    self.optimizer.step()  # “softmax回归的简洁实现”一节将用到

                train_l_sum += l.item()
                train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
                n += y.shape[0]
            test_acc = self.evaluate_accuracy(self.test_iter, self.net)
            print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' %
                  (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))