def __init__(self, input_channel, output_channel, mid_channel, ksize, stride, block_mode='ShuffleNetV2', fix_arch=True, bn=nn.BatchNorm, act_name='relu', use_se=False, **kwargs): super(ShuffleNetBlock, self).__init__() assert stride in [1, 2] assert ksize in [3, 5, 7] assert block_mode in ['ShuffleNetV2', 'ShuffleXception'] self.stride = stride self.ksize = ksize self.padding = self.ksize // 2 self.block_mode = block_mode self.input_channel = input_channel self.output_channel = output_channel # project_input_C == project_mid_C == project_output_C == main_input_channel self.project_channel = input_channel // 2 if stride == 1 else input_channel # stride 1, input will be split self.main_input_channel = input_channel // 2 if stride == 1 else input_channel self.main_mid_channel = mid_channel self.main_output_channel = output_channel - self.project_channel self.fix_arch = fix_arch with self.name_scope(): """ Regular block: (We usually have the down-sample block first, then followed by repeated regular blocks) Input[64] -> split two halves -> main branch: [32] --> mid_channels (final_output_C[64] // 2 * scale[1.4]) | |--> main_out_C[32] (final_out_C (64) - input_C[32] | |-----> project branch: [32], do nothing on this half Concat two copies: [64 - 32] + [32] --> [64] for final output channel ===================================================================== In "Single path one shot nas" paper, Channel Search is searching for the main branch intermediate #channel. And the mid channel is controlled / selected by the channel scales (0.2 ~ 2.0), calculated from: mid channel = block final output # channel // 2 * scale Since scale ~ (0, 2), this is guaranteed: main mid channel < final output channel """ self.channel_shuffle_and_split = ShuffleChannels( mid_channel=input_channel // 2, groups=2) self.main_branch = nn.HybridSequential( ) if fix_arch else NasBaseHybridSequential() if block_mode == 'ShuffleNetV2': self.main_branch.add( # pw nn.Conv2D(self.main_mid_channel, in_channels=self.main_input_channel, kernel_size=1, strides=1, padding=0, use_bias=False)) if not fix_arch: self.main_branch.add( ChannelSelector(channel_number=self.main_mid_channel)) self.main_branch.add( bn(in_channels=self.main_mid_channel, momentum=0.1), Activation(act_name), # dw with linear output nn.Conv2D(self.main_mid_channel, in_channels=self.main_mid_channel, kernel_size=self.ksize, strides=self.stride, padding=self.padding, groups=self.main_mid_channel, use_bias=False), bn(in_channels=self.main_mid_channel, momentum=0.1), # pw nn.Conv2D(self.main_output_channel, in_channels=self.main_mid_channel, kernel_size=1, strides=1, padding=0, use_bias=False), bn(in_channels=self.main_output_channel, momentum=0.1), Activation(act_name)) elif block_mode == 'ShuffleXception': self.main_branch.add( # dw with linear output nn.Conv2D(self.main_input_channel, in_channels=self.main_input_channel, kernel_size=self.ksize, strides=self.stride, padding=self.padding, groups=self.main_input_channel, use_bias=False), bn(in_channels=self.main_input_channel, momentum=0.1), # pw nn.Conv2D(self.main_mid_channel, in_channels=self.main_input_channel, kernel_size=1, strides=1, padding=0, use_bias=False)) if not fix_arch: self.main_branch.add( ChannelSelector(channel_number=self.main_mid_channel)) self.main_branch.add( bn(in_channels=self.main_mid_channel, momentum=0.1), Activation(act_name), # dw with linear output nn.Conv2D(self.main_mid_channel, in_channels=self.main_mid_channel, kernel_size=self.ksize, strides=1, padding=self.padding, groups=self.main_mid_channel, use_bias=False), bn(in_channels=self.main_mid_channel, momentum=0.1), # pw nn.Conv2D(self.main_mid_channel, in_channels=self.main_mid_channel, kernel_size=1, strides=1, padding=0, use_bias=False)) if not fix_arch: self.main_branch.add( ChannelSelector(channel_number=self.main_mid_channel)) self.main_branch.add( bn(in_channels=self.main_mid_channel, momentum=0.1), Activation(act_name), # dw with linear output nn.Conv2D(self.main_mid_channel, in_channels=self.main_mid_channel, kernel_size=self.ksize, strides=1, padding=self.padding, groups=self.main_mid_channel, use_bias=False), bn(in_channels=self.main_mid_channel, momentum=0.1), # pw nn.Conv2D(self.main_output_channel, in_channels=self.main_mid_channel, kernel_size=1, strides=1, padding=0, use_bias=False), bn(in_channels=self.main_output_channel, momentum=0.1), Activation(act_name)) if use_se: self.main_branch.add(SE(self.main_output_channel)) if self.stride == 2: """ Down-sample block: Input[16] -> two copies -> main branch: [16] --> mid_channels (final_output_C[64] // 2 * scale[1.4]) | |--> main_out_C[48] (final_out_C (64) - input_C[16]) | |-----> project branch: [16] --> project_mid_C[16] --> project_out_C[16] Concat two copies: [64 - 16] + [16] --> [64] for final output channel """ self.proj_branch = nn.HybridSequential() self.proj_branch.add( # dw with linear output nn.Conv2D(self.project_channel, in_channels=self.project_channel, kernel_size=self.ksize, strides=stride, padding=self.padding, groups=self.project_channel, use_bias=False), bn(in_channels=self.project_channel, momentum=0.1), # pw nn.Conv2D(self.project_channel, in_channels=self.project_channel, kernel_size=1, strides=1, padding=0, use_bias=False), bn(in_channels=self.project_channel, momentum=0.1), Activation(act_name))
# 过渡层 def transition_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(),nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2,strides=2)) return blk blk = transition_block(10) blk.initialize() print(blk(Y).shape) # DenseNet模型 net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) num_channels, growth_rate = 64, 32 num_convs_in_dense_blocks = [4, 4, 4, 4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) # 上一个稠密层输出通道数 num_channels += num_convs * growth_rate if i != len(num_convs_in_dense_blocks) -1: num_channels //= 2 net.add(transition_block(num_channels)) net.add(nn.BatchNorm(), nn.Activation('relu'), nn.GlobalAvgPool2D(),
def conv_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=3, padding=1)) return blk
def class_predictor(num_anchors, num_classes): return nn.Conv2D(num_anchors * (num_classes + 1), 3, padding=1)
#coding:utf-8 from mxnet.gluon import nn import sys sys.path.append('..') import utils from mxnet import autograd from mxnet import gluon from mxnet import nd net = nn.Sequential() with net.name_scope(): # 第一层卷积 net.add(nn.Conv2D(channels=20, kernel_size=5)) ### 添加了批量归一化层 net.add(nn.BatchNorm(axis=1)) net.add(nn.Activation(activation='relu')) net.add(nn.MaxPool2D(pool_size=2, strides=2)) # 第二层卷积 net.add(nn.Conv2D(channels=50, kernel_size=3)) ### 添加了批量归一化层 net.add(nn.BatchNorm(axis=1)) net.add(nn.Activation(activation='relu')) net.add(nn.MaxPool2D(pool_size=2, strides=2)) net.add(nn.Flatten()) # 第一层全连接 net.add(nn.Dense(128, activation="relu")) # 第二层全连接 net.add(nn.Dense(10)) ctx = utils.try_gpu() net.initialize(ctx=ctx)
transformer = [ gdata.vision.transforms.Resize(224), gdata.vision.transforms.ToTensor() ] transformer = gdata.vision.transforms.Compose(transformer) train_iter = gdata.DataLoader(train_data.transform_first(transformer), batch_size=batch_size, shuffle=True) test_iter = gdata.DataLoader(test_data.transform_first(transformer), batch_size=batch_size, shuffle=False) # 【定义模型】 net = nn.Sequential() net.add(nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'), nn.MaxPool2D(pool_size=3, strides=3), nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'), nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(4096, activation='relu'), nn.Dropout(0.5), nn.Dense(10)) # X = nd.random.uniform(shape=(1,1,224,224)) # net.initialize() # for layer in net: # X = layer(X)
def mobilenetv2_05(context, IMG_W, IMG_H): base_model = get_model('mobilenetv2_0.5', pretrained=True, ctx=context, norm_layer=gcv.nn.BatchNormCudnnOff) net = gluon.nn.HybridSequential() with net.name_scope(): #添加主干网络 net.add(base_model.features[:-4]) net.add(nn.Conv2D(128, 1, strides=1, padding=0, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) #构建upsample 模块 net.add( nn.Conv2DTranspose(128, 4, strides=2, padding=1, groups=128, in_channels=128, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add(nn.Conv2D(128, 1, strides=1, padding=0, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add( nn.Conv2DTranspose(128, 4, strides=2, padding=1, groups=128, in_channels=128, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add(nn.Conv2D(128, 1, strides=1, padding=0, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add( nn.Conv2DTranspose(128, 4, strides=2, padding=1, groups=128, in_channels=128, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add(nn.Conv2D(128, 1, strides=1, padding=0, use_bias=False)) net.add(gcv.nn.BatchNormCudnnOff(scale=True)) net.add(nn.Activation('relu')) net.add(nn.Conv2D(17, 1, strides=1, padding=0, use_bias=False)) net.initialize(ctx=context) x = mx.nd.ones((1, 3, IMG_H, IMG_W), ctx=context) net.summary(x) net.hybridize(static_alloc=True, static_shape=True) return net
def __init__(self, classes=1000, width_mult=1.0, mode="large", **kwargs): super(MobileNetV3, self).__init__() assert mode in ["large", "small"] # assert input_size%32 == 0 # self.w = width_mult setting = [] last_channel = 1280 input_channel = 16 if mode == "large": setting = [ # k, exp, c, se, nl, s, short_cut [3, 16, 16, False, 'RE', 1, False], [3, 64, 24, False, 'RE', 2, False], [3, 72, 24, False, 'RE', 1, True], [5, 72, 40, True, 'RE', 2, False], [5, 120, 40, True, 'RE', 1, True], [5, 120, 40, True, 'RE', 1, True], [3, 240, 80, False, 'HS', 2, False], [3, 200, 80, False, 'HS', 1, True], [3, 184, 80, False, 'HS', 1, True], [3, 184, 80, False, 'HS', 1, True], [3, 480, 112, True, 'HS', 1, False], [3, 672, 112, True, 'HS', 1, True], [5, 672, 112, True, 'HS', 1, True], [5, 672, 160, True, 'HS', 2, False], [5, 960, 160, True, 'HS', 1, True], ] else: setting = [ # k, exp, c, se, nl, s, [3, 16, 16, True, 'RE', 2, False], [3, 72, 24, False, 'RE', 2, False], [3, 88, 24, False, 'RE', 1, True], [5, 96, 40, True, 'HS', 2, False], # stride = 2, paper set it to 1 by error [5, 240, 40, True, 'HS', 1, True], [5, 240, 40, True, 'HS', 1, True], [5, 120, 48, True, 'HS', 1, False], [5, 144, 48, True, 'HS', 1, True], [5, 288, 96, True, 'HS', 2, False], [5, 576, 96, True, 'HS', 1, True], [5, 576, 96, True, 'HS', 1, True], ] self.last_channel = make_divisible( last_channel * width_mult) if width_mult > 1.0 else last_channel self.layers = [conv_bn(input_channel, 3, 2, activation=HSwish())] for kernel_size, exp, channel, se, act, s, short_cut in setting: # short_cut = (s == 1) output_channel = make_divisible(channel * width_mult) exp_channel = make_divisible(exp * width_mult) self.layers.append( MobileBottleNeck(output_channel, kernel_size, s, exp_channel, se, short_cut, act)) if mode == "large": last_conv = make_divisible(960 * width_mult) self.layers.append(conv_1x1_bn(last_channel, HSwish())) self.layers.append(AdaptiveAvgPool2D(output_size=1)) self.layers.append(HSwish()) self.layers.append(nn.Conv2D(last_channel, 1, 1, 0)) self.layers.append(HSwish()) else: last_conv = make_divisible(576 * width_mult) self.layers.append(conv_1x1_bn(last_channel, HSwish())) self.layers.append(SEModule(last_channel)) self.layers.append(AdaptiveAvgPool2D(output_size=1)) self.layers.append(HSwish()) self.layers.append(conv_1x1_bn(last_channel, HSwish())) self._layers = nn.HybridSequential() self._layers.add(*self.layers)
def test_deferred_init(): x = mx.nd.ones((5, 4, 10, 10)) layer = nn.Conv2D(10, 2) layer.collect_params().initialize() layer(x)
def conv_bn(channels, filter_size, stride, activation=nn.Activation('relu')): out = nn.HybridSequential() out.add(nn.Conv2D(channels, 3, stride, 1, use_bias=False), nn.BatchNorm(scale=True), activation) return out
def conv_1x1_bn(channels, activation=nn.Activation('relu')): out = nn.HybridSequential() out.add(nn.Conv2D(channels, 1, 1, 0, use_bias=False), nn.BatchNorm(scale=True), activation) return out
#FCN实现了图像像素到像素类别的变换 #全卷积通过转置卷积层将中间层特征图的高宽变换为输入图像的高宽 get_ipython().run_line_magic('matplotlib', 'inline') import d2lzh as d2l from mxnet import gluon, image, init, nd from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn import numpy as np import sys # In[ ]: #定义卷积核的运算 X = nd.arange(1, 17).reshape((1, 1, 4, 4)) K = nd.arange(1, 10).reshape((1, 1, 3, 3)) conv = nn.Conv2D(channels=1, kernel_size=3) conv.initialize(init.Constant(K)) conv(X), K # In[ ]: #改写卷积核为含有大量含零的稀疏矩阵 W, k = nd.zeros((4, 16)), nd.zeros(11) k[:3], k[4:7], k[8:] = K[0, 0, 0, :], K[0, 0, 1, :], K[0, 0, 2, :] W[0, 0:11], W[1, 1:12], W[2, 4:15], W[3, 5:16] = k, k, k, k nd.dot(W, X.reshape(16)).reshape((1, 1, 2, 2)), W # In[ ]: #卷积层修改通道数,通过设置步长使高宽缩小一半 conv = nn.Conv2D(10, kernel_size=4, padding=1, strides=2)
netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # state size. (ngf*8) x 16 x 16 netG.add(nn.Conv2DTranspose(ngf, 4, 2, 1, use_bias=False)) netG.add(nn.BatchNorm()) netG.add(nn.Activation('relu')) # state size. (ngf*8) x 32 x 32 netG.add(nn.Conv2DTranspose(nc, 4, 2, 1, use_bias=False)) netG.add(nn.Activation('tanh')) # state size. (nc) x 64 x 64 # build the discriminator netD = nn.Sequential() with netD.name_scope(): # input is (nc) x 64 x 64 netD.add(nn.Conv2D(ndf, 4, 2, 1, use_bias=False)) netD.add(nn.LeakyReLU(0.2)) # state size. (ndf) x 32 x 32 netD.add(nn.Conv2D(ndf * 2, 4, 2, 1, use_bias=False)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # state size. (ndf) x 16 x 16 netD.add(nn.Conv2D(ndf * 4, 4, 2, 1, use_bias=False)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # state size. (ndf) x 8 x 8 netD.add(nn.Conv2D(ndf * 8, 4, 2, 1, use_bias=False)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # state size. (ndf) x 4 x 4 netD.add(nn.Conv2D(2, 4, 1, 0, use_bias=False))
netG.add(nn.Activation('relu')) # output size. (ngf*8) x 32 x 32 netG.add(nn.Conv2DTranspose(nc, 4, 2, 1)) netG.add( nn.Activation('tanh') ) # use tanh , we need an output that is between -1 to 1, not 0 to 1 # Rememeber the input image is normalised between -1 to 1, so should be the output # output size. (nc) x 64 x 64 # build the discriminator model ndf = 64 netD = nn.Sequential() # maps with netD.name_scope(): # input is (nc) x 64 x 64 netD.add(nn.Conv2D(ndf, 4, 2, 1)) netD.add(nn.LeakyReLU(0.2)) # output size. (ndf) x 32 x 32 netD.add(nn.Conv2D(ndf * 2, 4, 2, 1)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # output size. (ndf) x 16 x 16 netD.add(nn.Conv2D(ndf * 4, 4, 2, 1)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # output size. (ndf) x 8 x 8 netD.add(nn.Conv2D(ndf * 8, 4, 2, 1)) netD.add(nn.BatchNorm()) netD.add(nn.LeakyReLU(0.2)) # output size. (ndf) x 4 x 4 netD.add(nn.Conv2D(1, 4, 1, 0))
def _conv3x3(channels, stride, in_channels): return nn.Conv2D(channels, kernel_size=3, strides=stride, padding=1, use_bias=False, in_channels=in_channels)
X[:, 2:6] = 0 # print(X) # 构造卷积核 ,使得当左右相邻元素相同时,输出为0,若不一样时,输出为1,已达到边缘检测的效果! # 使用nd或者np构造矩阵时,一定不要忘记写入两个[],而不是因为是横向量或是竖向量而只写一个[] K = nd.array([[1, -1]]) Y = corr2d(X, K) # print(Y) # 【使用gluon完成卷积运算】 # 构造一个输出通道为1,核形状为(1,2)的二维卷积层 # 定义模型 conv2d = nn.Conv2D(1, kernel_size=(1, 2)) # 初始化模型函数 conv2d.initialize() # 二维卷积层使用4维输入输出,格式为(样本,通道,高,宽),这里的批量大小和通道数均为1 X = X.reshape((1, 1, 6, 8)) Y = Y.reshape((1, 1, 6, 7)) num_epochs = 10 for epoch in range(num_epochs): with autograd.record(): Y_hat = conv2d(X) l = (Y_hat - Y)**2 l.backward()
""" 5.2.1填充 """ from mxnet import nd from mxnet.gluon import nn # 定义一个函数来计算卷积层。它初始化卷积层权重,并对输入和输出做相应的升维和降维 def comp_conv2d(conv2d, X): conv2d.initialize() # (1, 1)代表批量大小和通道数 X = X.reshape((1, 1) + X.shape) Y = conv2d(X) return Y.reshape(Y.shape[2:]) conv2d = nn.Conv2D(1, kernel_size=3, padding=1) X = nd.random.uniform(shape=(8, 8)) print(comp_conv2d(conv2d, X)) print(comp_conv2d(conv2d, X).shape) print("=================================================================") conv2d = nn.Conv2D(1, kernel_size=(5, 3), padding=(2, 1)) print(comp_conv2d(conv2d, X).shape) """ 5.2.2步幅 """ conv2d = nn.Conv2D(1, kernel_size=3, padding=1, strides=2) print(comp_conv2d(conv2d, X).shape) conv2d = nn.Conv2D(1, kernel_size=(3, 5), padding=(0, 1), strides=2) print(comp_conv2d(conv2d, X).shape)
def __init__(self, askc_type, start_layer, layers, channels, classes, deep_stem, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(CIFARAFFResNet, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add( norm_layer(scale=False, center=False, **({} if norm_kwargs is None else norm_kwargs))) # self.features.add(nn.Conv2D(channels[0], 3, 1, 1, use_bias=False)) stem_width = channels[0] // 2 if not deep_stem: self.features.add( nn.Conv2D(channels[0], 3, 1, 1, use_bias=False)) else: self.features.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.features.add(norm_layer(in_channels=stem_width)) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.features.add(norm_layer(in_channels=stem_width)) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) in_channels = channels[0] for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(askc_type=askc_type, start_layer=start_layer, layers=num_layer, channels=channels[i + 1], in_channels=in_channels, stride=stride, stage_index=i + 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) in_channels = channels[i + 1] self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.features.add(nn.GlobalAvgPool2D()) self.features.add(nn.Flatten()) self.output = nn.Dense(classes, in_units=in_channels)
def __init__(self, n_hidden, vocab_size, embed_dim, max_seq_length, **kwargs): super(korean_autospacing2, self).__init__(**kwargs) # 입력 시퀀스 길이 self.in_seq_len = max_seq_length # 출력 시퀀스 길이 self.out_seq_len = max_seq_length # GRU의 hidden 개수 self.n_hidden = n_hidden # 고유문자개수 self.vocab_size = vocab_size # max_seq_length self.max_seq_length = max_seq_length # 임베딩 차원수 self.embed_dim = embed_dim with self.name_scope(): self.embedding = nn.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim) self.conv_unigram = nn.Conv2D(channels=128, kernel_size=(1, self.embed_dim)) self.conv_bigram = nn.Conv2D(channels=128, kernel_size=(2, self.embed_dim), padding=(1, 0)) self.conv_trigram = nn.Conv2D(channels=64, kernel_size=(3, self.embed_dim), padding=(2, 0)) self.conv_forthgram = nn.Conv2D(channels=32, kernel_size=(4, self.embed_dim), padding=(3, 0)) self.conv_fifthgram = nn.Conv2D(channels=16, kernel_size=(5, self.embed_dim), padding=(4, 0)) # for reverse convolution self.conv_rev_bigram = nn.Conv2D(channels=128, kernel_size=(2, self.embed_dim), padding=(1, 0)) self.conv_rev_trigram = nn.Conv2D(channels=64, kernel_size=(3, self.embed_dim), padding=(2, 0)) self.conv_rev_forthgram = nn.Conv2D(channels=32, kernel_size=(4, self.embed_dim), padding=(3, 0)) self.conv_rev_fifthgram = nn.Conv2D(channels=16, kernel_size=(5, self.embed_dim), padding=(4, 0)) self.bi_gru = rnn.GRU(hidden_size=self.n_hidden, layout='NTC', bidirectional=True) # self.bi_gru = rnn.BidirectionalCell( # rnn.GRUCell(hidden_size=self.n_hidden), # rnn.GRUCell(hidden_size=self.n_hidden)) self.dense_sh = nn.Dense(100, activation='relu', flatten=False) self.dense = nn.Dense(1, activation='sigmoid', flatten=False)
def __init__(self, askc_type, start_layer, layers, classes=1000, dilated=False, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=True, stem_width=32, avg_down=True, final_drop=0.0, use_global_stats=False, name_prefix='', **kwargs): self.inplanes = stem_width * 2 if deep_stem else 64 super(AFFResNet, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): if not deep_stem: self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) else: self.conv1 = nn.HybridSequential(prefix='conv1') self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.conv1.add( norm_layer(in_channels=stem_width, **norm_kwargs)) self.conv1.add(nn.Activation('relu')) self.conv1.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.bn1 = norm_layer( in_channels=64 if not deep_stem else stem_width * 2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) self.layer1 = self._make_layer(askc_type, start_layer, 1, AFFBottleneck, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer2 = self._make_layer(askc_type, start_layer, 2, AFFBottleneck, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: self.layer3 = self._make_layer(askc_type, start_layer, 3, AFFBottleneck, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(askc_type, start_layer, 4, AFFBottleneck, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: self.layer3 = self._make_layer(askc_type, start_layer, 3, AFFBottleneck, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.layer4 = self._make_layer(askc_type, start_layer, 4, AFFBottleneck, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * AFFBottleneck.expansion, units=classes)
def __init__(self): super(RPN, self).__init__() self.conv1 = nn.Conv2D(256, 3, 1, 1, activation='relu') self.bn1 = nn.BatchNorm() self.class_predictor = nn.Conv2D(9 * 2, 1) self.bbox_predictor = nn.Conv2D(9 * 4, 1)
def _make_layer(self, askc_type, start_layer, stage_index, block, planes, blocks, strides=1, dilation=1, avg_down=False, norm_layer=None, last_gamma=False): if stage_index < start_layer: askc_type = 'DirectAdd' downsample = None if strides != 1 or self.inplanes != planes * block.expansion: downsample = nn.HybridSequential(prefix='down%d_' % stage_index) with downsample.name_scope(): if avg_down: if dilation == 1: downsample.add( nn.AvgPool2D(pool_size=strides, strides=strides, ceil_mode=True, count_include_pad=False)) else: downsample.add( nn.AvgPool2D(pool_size=1, strides=1, ceil_mode=True, count_include_pad=False)) downsample.add( nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=1, use_bias=False)) downsample.add( norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) else: downsample.add( nn.Conv2D(channels=planes * block.expansion, kernel_size=1, strides=strides, use_bias=False)) downsample.add( norm_layer(in_channels=planes * block.expansion, **self.norm_kwargs)) layers = nn.HybridSequential(prefix='layers%d_' % stage_index) with layers.name_scope(): if dilation in (1, 2): layers.add( AFFBottleneck(askc_type, planes, strides, dilation=1, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) elif dilation == 4: layers.add( AFFBottleneck(askc_type, planes, strides, dilation=2, downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) else: raise RuntimeError( "=> unknown dilation size: {}".format(dilation)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.add( AFFBottleneck(askc_type, planes, dilation=dilation, previous_dilation=dilation, norm_layer=norm_layer, norm_kwargs=self.norm_kwargs, last_gamma=last_gamma)) return layers
def box_predictor(num_anchors): return nn.Conv2D(num_anchors * 4, 3, padding=1)
def __init__(self, askc_type, start_layer, layers, cardinality, bottleneck_width, classes=1000, last_gamma=False, use_se=False, deep_stem=True, avg_down=True, stem_width=64, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(AFFResNeXt, self).__init__(**kwargs) self.cardinality = cardinality self.bottleneck_width = bottleneck_width channels = 64 with self.name_scope(): self.features = nn.HybridSequential(prefix='') if not deep_stem: self.features.add( nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False)) else: self.features.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=2, padding=1, use_bias=False)) self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=stem_width, kernel_size=3, strides=1, padding=1, use_bias=False)) self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.features.add( nn.Conv2D(channels=stem_width * 2, kernel_size=3, strides=1, padding=1, use_bias=False)) self.features.add( norm_layer(**({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Activation('relu')) self.features.add(nn.MaxPool2D(3, 2, 1)) for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 self.features.add( self._make_layer(askc_type, start_layer, channels, num_layer, stride, last_gamma, use_se, False if i == 0 else avg_down, i + 1, norm_layer=norm_layer, norm_kwargs=norm_kwargs)) channels *= 2 self.features.add(nn.GlobalAvgPool2D()) self.output = nn.Dense(classes)
def train(): mnist_train = datasets.FashionMNIST(train=True) X, y = mnist_train[0] print('X shape: ', X.shape, 'X dtype', X.dtype, 'y:', y) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_train = mnist_train.transform_first(transformer) batch_size = 256 train_data = gluon.data.DataLoader( mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) for data, label in train_data: print(data.shape, label.shape) break mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) net = nn.HybridSequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) net.initialize(init=init.Xavier(), ctx=device) net.hybridize() softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) for epoch in range(10): train_loss, train_acc, valid_acc = 0., 0., 0. tic = time.time() for data, label in train_data: data = data.copyto(device) label = label.copyto(device) # forward + backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc(output, label) # calculate validation accuracy for data, label in valid_data: data = data.copyto(device) label = label.copyto(device) valid_acc += acc(net(data), label) print("Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % ( epoch, train_loss/len(train_data), train_acc/len(train_data), valid_acc/len(valid_data), time.time()-tic)) net.export('net-trained', epoch=10)
def __init__(self, act_type, r, act_layers, layers, classes=1000, dilated=False, norm_layer=BatchNorm, norm_kwargs=None, last_gamma=False, deep_stem=False, stem_width=32, avg_down=False, final_drop=0.0, use_global_stats=False, name_prefix='', **kwargs): self.inplanes = stem_width*2 if deep_stem else 64 super(ResNet50_v1bATAC, self).__init__(prefix=name_prefix) norm_kwargs = norm_kwargs if norm_kwargs is not None else {} if use_global_stats: norm_kwargs['use_global_stats'] = True self.norm_kwargs = norm_kwargs with self.name_scope(): self.conv1 = nn.Conv2D(channels=64, kernel_size=7, strides=2, padding=3, use_bias=False) self.bn1 = norm_layer(in_channels=64 if not deep_stem else stem_width*2, **norm_kwargs) self.relu = nn.Activation('relu') self.maxpool = nn.MaxPool2D(pool_size=3, strides=2, padding=1) if act_layers >= len(layers): tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer1 = self._make_layer(tmp_act_type, r, 1, BottleneckV1bATAC, 64, layers[0], avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if act_layers >= len(layers) - 1: tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer2 = self._make_layer(tmp_act_type, r, 2, BottleneckV1bATAC, 128, layers[1], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if dilated: if act_layers >= len(layers) - 2: tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer3 = self._make_layer(tmp_act_type, r, 3, BottleneckV1bATAC, 256, layers[2], strides=1, dilation=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if act_layers >= len(layers) - 3: tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer4 = self._make_layer(tmp_act_type, r, 4, BottleneckV1bATAC, 512, layers[3], strides=1, dilation=4, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) else: if act_layers >= len(layers) - 2: tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer3 = self._make_layer(tmp_act_type, r, 3, BottleneckV1bATAC, 256, layers[2], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) if act_layers >= len(layers) - 3: tmp_act_type = act_type else: tmp_act_type = 'relu' self.layer4 = self._make_layer(tmp_act_type, r, 4, BottleneckV1bATAC, 512, layers[3], strides=2, avg_down=avg_down, norm_layer=norm_layer, last_gamma=last_gamma) self.avgpool = nn.GlobalAvgPool2D() self.flat = nn.Flatten() self.drop = None if final_drop > 0.0: self.drop = nn.Dropout(final_drop) self.fc = nn.Dense(in_units=512 * BottleneckV1bATAC.expansion, units=classes)
blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return blk # In[5]: blk = transition_block(10) blk.initialize() blk(Y).shape # In[6]: net = nn.Sequential() net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3), nn.BatchNorm(), nn.Activation('relu'), nn.MaxPool2D(pool_size=3, strides=2, padding=1)) # In[7]: num_channels, growth_rate = 64, 32 # num_channels为当前的通道数 num_convs_in_dense_blocks = [4, 4, 4, 4] for i, num_convs in enumerate(num_convs_in_dense_blocks): net.add(DenseBlock(num_convs, growth_rate)) # 上一个稠密块的输出通道数 num_channels += num_convs * growth_rate # 在稠密块之间加入通道数减半的过渡层 if i != len(num_convs_in_dense_blocks) - 1: num_channels //= 2 net.add(transition_block(num_channels))
def __init__(self, layers, channels, classes, act_type, r, skernel, dilation, useReLU, useGlobal, act_layers, replace_act, act_order, asBackbone, norm_layer=BatchNorm, norm_kwargs=None, **kwargs): super(ResNet20V2ATAC, self).__init__(**kwargs) assert len(layers) == len(channels) - 1 with self.name_scope(): self.features = nn.HybridSequential(prefix='') self.features.add(norm_layer(scale=False, center=False, **({} if norm_kwargs is None else norm_kwargs))) self.features.add(nn.Conv2D(channels[0], 3, 1, 1, use_bias=False)) in_channels = channels[0] for i, num_layer in enumerate(layers): stride = 1 if i == 0 else 2 if act_order == 'bac': if i + act_layers < len(channels): tmp_act_type = replace_act else: tmp_act_type = act_type elif act_order == 'pre': if i + 1 > act_layers: tmp_act_type = replace_act else: tmp_act_type = act_type else: raise ValueError('Unknown act_order') self.features.add(self._make_layer( layers=num_layer, channels=channels[i+1], in_channels=in_channels, stride=stride, stage_index=i+1, act_type=tmp_act_type, r=r, skernel=skernel, dilation=dilation, useReLU=useReLU, useGlobal=useGlobal, asBackbone=asBackbone, norm_layer=norm_layer, norm_kwargs=norm_kwargs )) in_channels = channels[i+1] self.features.add(norm_layer(**({} if norm_kwargs is None else norm_kwargs))) if act_order == 'bac': if act_layers <= 0: tmp_act_type = replace_act else: tmp_act_type = act_type elif act_order == 'pre': if act_layers >= 4: tmp_act_type = act_type else: tmp_act_type = replace_act else: raise ValueError('Unknown act_order') if tmp_act_type == 'relu': self.features.add(nn.Activation('relu')) elif tmp_act_type == 'prelu': self.features.add(nn.PReLU()) elif tmp_act_type == 'elu': self.features.add(nn.ELU()) elif tmp_act_type == 'selu': self.features.add(nn.SELU()) elif tmp_act_type == 'gelu': self.features.add(nn.GELU()) elif tmp_act_type == 'swish': self.features.add(nn.Swish()) elif tmp_act_type == 'ChaATAC': self.features.add(ChaATAC(channels=in_channels, r=r, useReLU=useReLU, useGlobal=useGlobal)) else: raise ValueError("Unknown act_type in ResBlockV2ATAC") self.features.add(nn.GlobalAvgPool2D()) self.features.add(nn.Flatten()) self.output = nn.Dense(classes, in_units=in_channels)
def transition_block(num_channels): blk = nn.Sequential() blk.add(nn.BatchNorm(), nn.Activation('relu'), nn.Conv2D(num_channels, kernel_size=1), nn.AvgPool2D(pool_size=2, strides=2)) return blk
def __init__(self, num_layers, base_size, sizes, ratios, steps, classes, stds=(0.1, 0.1, 0.2, 0.2), nms_thresh=0.3, nms_topk=10000, post_nms=3000, anchor_alloc_size=640, is_multitask=False, use_pose=False, use_keypoints=False, num_keypoints=1, use_embedding=False, embedding_dim=128, return_intermediate_features=False, **kwargs): super(SSDDetectorHead, self).__init__(**kwargs) self._num_layers = num_layers self.classes = classes self.nms_thresh = nms_thresh self.nms_topk = nms_topk self.post_nms = post_nms self._use_pose = use_pose if self._use_pose: self._is_multitask = True else: self._is_multitask = is_multitask self._use_keypoints = use_keypoints self._keypoint_size = num_keypoints * 2 self._use_emebdding = use_embedding self._embedding_dim = embedding_dim self._return_int_feat = return_intermediate_features with self.name_scope(): self.class_predictors = nn.HybridSequential() self.box_predictors = nn.HybridSequential() self.anchor_generators = nn.HybridSequential() if self._is_multitask: self.landmark_predictors = nn.HybridSequential() if self._use_pose: self.pose_predictors = nn.HybridSequential() if self._use_keypoints: self.keypoint_predictors = nn.HybridSequential() if self._use_emebdding: self.embedding_predictors = nn.HybridSequential() asz = anchor_alloc_size im_size = (base_size, base_size) for i, s, r, st in zip(range(num_layers), sizes, ratios, steps): anchor_generator = SSDAnchorGenerator(i, im_size, s, r, st, (asz, asz)) self.anchor_generators.add(anchor_generator) asz = max(asz // 2, 16) # pre-compute larger than 16x16 anchor map num_anchors = anchor_generator.num_depth self.class_predictors.add( ConvPredictor(num_anchors * (len(self.classes) + 1))) self.box_predictors.add(ConvPredictor(num_anchors * 4)) if self._is_multitask: self.landmark_predictors.add( ConvPredictor(num_anchors * 10)) if self._use_pose: self.pose_predictors.add(ConvPredictor(num_anchors * 6)) if self._use_keypoints: self.keypoint_predictors.add( ConvPredictor(num_anchors * self._keypoint_size)) if self._use_emebdding: local_seq = nn.HybridSequential() local_seq.add( ConvPredictor(num_anchors * self._embedding_dim * len(self.classes))) local_seq.add( nn.BatchNorm(prefix='embedding_norm_{}_'.format(i))) local_seq.add(nn.LeakyReLU(alpha=0.25)) local_seq.add( nn.Conv2D( num_anchors * self._embedding_dim * len(self.classes), (1, 1), weight_initializer=mx.init.Xavier(magnitude=2), bias_initializer='zeros', groups=num_anchors * len(self.classes))) self.embedding_predictors.add(local_seq) self.bbox_decoder = NormalizedBoxCenterDecoder(stds) self.cls_decoder = MultiPerClassDecoder(len(self.classes) + 1, thresh=0.01) if self._is_multitask: self.landmark_decoder = NormalizedLandmarkCenterDecoder(stds) if self._use_keypoints: self.keypoint_decoder = GeneralNormalizedKeyPointsDecoder(1)