def function_set(self): # 第一层卷积 # 卷积 h1_conv = nd.Convolution( data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=self.__W1.shape[2:], num_filter=self.__W1.shape[0]) # 激活 h1_activation = nd.relu(h1_conv) # 池化 h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution( data=h1, weight=self.__W2, bias=self.__b2, kernel=self.__W2.shape[2:], num_filter=self.__W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, self.__W3) + self.__b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, self.__W4) + self.__b4 # print("1st conv block:", h1.shape) # print("2nd conv block:", h2.shape) # print("1st dense:", h3.shape) # print("2nd dense:", h4_linear.shape) # print("output:", h4_linear) return h4_linear
def net(X, verbose=False): X = X.as_in_context(W1.context) # 第一层卷积 h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, W3) + b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, W4) + b4 if verbose: print('1st conv block', h1.shape) print('2nd conv block', h2.shape) print('1st conv block', h3.shape) print('2nd conv block', h4_linear.shape) print('output:', h4_linear) return h4_linear
def function_set(self): def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps=1e-5, moving_momentum=0.9): assert len(X.shape) in (2, 4) # 全连接: batch_size x feature if len(X.shape) == 2: # 每个输入维度在样本上的平均和方差 mean = X.mean(axis=0) variance = ((X - mean) ** 2).mean(axis=0) # 2D卷积: batch_size x channel x height x width else: # 对每个通道算均值和方差,需要保持 4D 形状使得可以正确的广播 mean = X.mean(axis=(0, 2, 3), keepdims=True) variance = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True) # 变形使得可以正确的广播 moving_mean = moving_mean.reshape(mean.shape) moving_variance = moving_variance.reshape(mean.shape) # 均一化 if is_training: X_hat = (X - mean) / nd.sqrt(variance + eps) # !!! 更新全局的均值和方差 # 每一个 batch_X 都会使用上个 batch_X 的 0.9 与 这个 batch_X 的 0.1 moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance else: # !!! 测试阶段使用全局的均值和方差 X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps) # 拉升和偏移 return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape) # 第一层卷积 h1_conv = nd.Convolution( data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=(5, 5), num_filter=20) # 第一个 BN h1_bn = batch_norm( h1_conv, self.__gamma1, self.__beta1, self.__is_training, self.__moving_mean1, self.__moving_variance1) h1_activation = nd.relu(h1_bn) h1 = nd.Pooling( data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution( data=h1, weight=self.__W2, bias=self.__b2, kernel=(3, 3), num_filter=50) # 第二个 BN h2_bn = batch_norm( h2_conv, self.__gamma2, self.__beta2, self.__is_training, self.__moving_mean2, self.__moving_variance2) h2_activation = nd.relu(h2_bn) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, self.__W3) + self.__b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, self.__W4) + self.__b4 return h4_linear
def net(X, verbose=False): X = X.as_in_context(W1.context) # 第一个卷积层 h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # h1_conv.shape: (256, 20, 24, 24) # h1.shape: (256, 20, 12, 12) #print('h1_conv.shape: ',h1_conv.shape) #print('h1.shape: ',h1.shape) # 第二个卷积层 h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一个全连接层 h3_linear = nd.dot(h2, W3) + b3 h3 = nd.relu(h3_linear) # 第二个全连接层 h4_linear = nd.dot(h3, W4) + b4 if verbose: print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense block:', h3.shape) print('2nd dense block:', h4_linear.shape) print('output:', h4_linear) return h4_linear
def network(self, X=None, debug=False,): filters, kernels, stride, padding, dilate = self.conv_params['num_filter'], self.conv_params['kernel'], \ self.conv_params['stride'], self.conv_params['padding'], self.conv_params['dilate'] type_pool, kernels_pool, stride_pool, padding_pool, dilate_pool = self.pool_params['pool_type'], \ self.pool_params['kernel'], self.pool_params['stride'], \ self.pool_params['padding'], self.pool_params['dilate'] act_type = self.act_params['act_type'] hidden_dim = self.fc_params['hidden_dim'] # CNN ########################################################################################################## convlayer_out = X interlayer = [] for i, (nf, k, S, P, D, t_p, k_p, S_p, P_p, D_p, a) in enumerate(zip(filters, kernels, stride, padding, dilate, type_pool, kernels_pool, stride_pool, padding_pool, dilate_pool, act_type)): W, b = self.params['W{:d}'.format(i+1,)], self.params['b{:d}'.format(i+1,)] convlayer_out = nd.Convolution(data = convlayer_out, weight=W, bias=b, kernel=k, num_filter=nf, stride=S, dilate=D) convlayer_out = activation(convlayer_out, act_type = a) convlayer_out = nd.Pooling(data=convlayer_out, pool_type=t_p, kernel=k_p, stride=S_p, pad=P_p) interlayer.append(convlayer_out) i_out = i if debug: print("layer{:d} shape: {}".format(i+1, convlayer_out.shape)) # MLP ########################################################################################################## FClayer_out = nd.flatten(convlayer_out) interlayer.append(FClayer_out) if debug: print("After Flattened, Data shape: {}".format(FClayer_out.shape)) for j, (hd, a) in enumerate(zip(hidden_dim, act_type[-len(hidden_dim):])): W, b = self.params['W{:d}'.format(j+i_out+2,)], self.params['b{:d}'.format(j+i_out+2,)] FClayer_out = nd.dot(FClayer_out, W) + b FClayer_out = activation(FClayer_out, act_type = a) if autograd.is_training(): # 对激活函数的输出使用droupout FClayer_out = dropout(FClayer_out, self.drop_prob) if debug: print("layer{:d} shape: {}".format(j+i_out+2, FClayer_out.shape)) interlayer.append(FClayer_out) j_out = j # OUTPUT ########################################################################################################## W, b = self.params['W{:d}'.format(j_out+i_out+3,)], self.params['b{:d}'.format(j_out+i_out+3,)] yhat = nd.dot(FClayer_out, W) + b if debug: print("Output shape: {}".format(yhat.shape)) print('------------') interlayer.append(yhat) return yhat, interlayer
def forward(self, data, valid_length): masked_encoded = F.SequenceMask(data, sequence_length=valid_length, use_sequence_length=True) subsampled = F.Pooling(masked_encoded.swapaxes(0, 2), kernel=self.size, pool_type='max', stride=self.size).swapaxes(0, 2) sub_valid_length = mx.nd.ceil(valid_length / self.size) return subsampled, sub_valid_length
def net(x, is_training=False, verbose=False): x = x.as_in_context(w1.context) h1_conv = nd.Convolution(data=x, weight=w1, bias=b1, kernel=w1.shape[2:], num_filter=c1) h1_bn = utils.batch_norm(h1_conv, gamma1, beta1, is_training, moving_mean1, moving_variance1) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) h2_conv = nd.Convolution(data=h1, weight=w2, bias=b2, kernel=w2.shape[2:], num_filter=c2) h2_bn = utils.batch_norm(h2_conv, gamma2, beta2, is_training, moving_mean2, moving_variance2) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) h3_linear = nd.dot(h2, w3) + b3 h3 = nd.relu(h3_linear) h4_linear = nd.dot(h3, w4) + b4 if verbose: print('h1 conv block: ', h1.shape) print('h2 conv block: ', h2.shape) print('h3 conv block: ', h3.shape) print('h4 conv block: ', h4_linear.shape) print('output: ', h4_linear) return h4_linear.as_in_context(ctx)
def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1 #data size # MNIST,FashionMNIST = (batch size , 1 , 28 , 28) # CIFAR = (batch size , 3 , 32 , 32) C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1) , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15) C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST : result = ( batch size , 30 , 8 , 8), CIFAR10 : result = ( batch size , 30 , 10 , 10) P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5) P_H2 = nd.flatten(data=P_H2) '''FullyConnected parameter • data: (batch_size, input_dim) • weight: (num_hidden, input_dim) • bias: (num_hidden,) • out: (batch_size, num_hidden) ''' F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid") F_H1 =nd.Dropout(data=F_H1, p=drop_rate) F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid") F_H2 =nd.Dropout(data=F_H2, p=drop_rate) softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10)) return softmax_Y
def forward(self, X): h = F.Activation(self.conv1_1(X), act_type='relu') h = F.Activation(self.conv1_2(h), act_type='relu') relu1_2 = h h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2)) h = F.Activation(self.conv2_1(h), act_type='relu') h = F.Activation(self.conv2_2(h), act_type='relu') relu2_2 = h h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2)) h = F.Activation(self.conv3_1(h), act_type='relu') h = F.Activation(self.conv3_2(h), act_type='relu') h = F.Activation(self.conv3_3(h), act_type='relu') relu3_3 = h h = F.Pooling(h, pool_type='max', kernel=(2, 2), stride=(2, 2)) h = F.Activation(self.conv4_1(h), act_type='relu') h = F.Activation(self.conv4_2(h), act_type='relu') h = F.Activation(self.conv4_3(h), act_type='relu') relu4_3 = h return [relu1_2, relu2_2, relu3_3, relu4_3]
def net_lenet(X, verbose=False): # 第一层卷积 h1_conv = nd.Convolution(data=X, weight=lenet_W1, bias=lenet_b1, kernel=lenet_W1.shape[2:], num_filter=lenet_W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution(data=h1, weight=lenet_W2, bias=lenet_b2, kernel=lenet_W2.shape[2:], num_filter=lenet_W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, lenet_W3) + lenet_b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, lenet_W4) + lenet_b4 if verbose: print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense:', h3.shape) print('2nd dense:', h4_linear.shape) print('output:', h4_linear) return h4_linear
def forward(self, data): for i, c in enumerate(self._convolutions): data = nd.concat(data, data, dim=1) data = c(data) data = Tanh()(data) data = nd.Pooling(data=data, pool_type='max', kernel=(2, 2), stride=(2, 2), pad=(0, 0)) data = self._linear(data) data = Tanh()(data) data = self._classifier(data) return data
def forward(self, data): for i, c in enumerate(self._convolutions): data = c(data) data = Tanh()(data) data = nd.Pooling(data=data, pool_type='max', kernel=(2, 2), stride=(2, 2), pad=(0, 0), cudnn_off=True) # data = nd.max(data, axis=1) data = self._linear(data) data = Tanh()(data) data = self._classifier(data) return data
def forward(self, X, stride=1): filters = [] for i in range(self._n_scales): kernel = (i * 2 + 1, ) * 2 pad = (i, ) * 2 f = nd.Pooling(data=data, pool_type='max', kernel=kernel, stride=(stride, stride), pad=pad, cudnn_off=True) f = nd.reshape(f, (f.shape[0], 1) + f.shape[1:]) filters.append(f) filters = nd.concat(*filters, dim=1) weight = nd.softmax(self._get_param(self.weight), axis=1) filters = nd.mean(filters, axis=1) # filters = nd.sum(filters * weight, axis=1) return filters
def forward(self, x): inp = x.shape[1] x = nd.Pooling(x, global_pool=True) x = nd.flatten(x) x = nd.dot(x, self.weight.data()[:inp,:]) + self.bias.data() return x
import mxnet as mx
def net_PLB(X, params, debug=False, pool_type='max', pool_size=4, pool_stride=4): [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7] = params ######################## # Define the computation of the first convolutional layer ######################## h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=(1, 16), num_filter=64, stride=(1, 1), dilate=(1, 1)) h1_pooling = nd.Pooling(data=h1_conv, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) h1 = relu(h1_pooling) if debug: print("h1 shape: %s" % (np.array(h1.shape))) ######################## # Define the computation of the second convolutional layer ######################## h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=(1, 16), num_filter=128, stride=(1, 1), dilate=(1, 2)) h2_pooling = nd.Pooling(data=h2_conv, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) h2 = relu(h2_pooling) if debug: print("h2 shape: %s" % (np.array(h2.shape))) ######################## # Define the computation of the third convolutional layer ######################## h3_conv = nd.Convolution(data=h2, weight=W3, bias=b3, kernel=(1, 16), num_filter=256, stride=(1, 1), dilate=(1, 2)) h3_pooling = nd.Pooling(data=h3_conv, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) h3 = relu(h3_pooling) if debug: print("h3 shape: %s" % (np.array(h3.shape))) ######################## # Define the computation of the 4th convolutional layer ######################## h4_conv = nd.Convolution(data=h3, weight=W4, bias=b4, kernel=(1, 32), num_filter=512, stride=(1, 1), dilate=(1, 2)) h4_pooling = nd.Pooling(data=h4_conv, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) h4 = relu(h4_pooling) if debug: print("h4 shape: %s" % (np.array(h4.shape))) ######################## # Flattening h4 so that we can feed it into a fully-connected layer ######################## h5 = nd.flatten(h4) if debug: print("Flat h5 shape: %s" % (np.array(h5.shape))) ######################## # Define the computation of the 5th (fully-connected) layer ######################## h6_linear = nd.dot(h5, W5) + b5 h6 = relu(h6_linear) if debug: print("h6 shape: %s" % (np.array(h6.shape))) ######################## # Define the computation of the 6th (fully-connected) layer ######################## h7_linear = nd.dot(h6, W6) + b6 h7 = relu(h7_linear) if debug: print("h7 shape: %s" % (np.array(h7.shape))) ######################## # Define the computation of the output layer ######################## yhat_linear = nd.dot(h7, W7) + b7 if debug: print("yhat_linear shape: %s" % (np.array(yhat_linear.shape))) interlayer = [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7] return yhat_linear, interlayer
print('weight:', w) print('bias:', b) print('output:', out) # 窗口移动和边缘填充 out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[1], stride=(2, 2), pad=(1, 1)) print('output:', out) # 多通道数据卷积:每个通道会有相应的权重,然后对每个通道做卷积之后,在通道之间求和 data = nd.arange(18).reshape((1, 2, 3, 3)) w = nd.arange(8).reshape((1, 2, 2, 2)) out = nd.Convolution(data, w, b, kernel=w.shape[2:], num_filter=w.shape[0]) print('weight = ', w) print('data = ', data) print('output = ', out) # Pooling data = nd.arange(18).reshape((1, 2, 3, 3)) max_pool = nd.Pooling(data=data, pool_type="max", kernel=(2, 2)) avg_pool = nd.Pooling(data=data, pool_type="avg", kernel=(2, 2)) print('data = ', data) print('max pool = ', max_pool) print('avg pool = ', avg_pool)
def net_PRL(X, params, debug=False, pool_type='max', pool_size=4, pool_stride=2): [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7, W8, b8, W9, b9] = params drop_prob = 0.5 ######################## # Define the computation of the first convolutional layer ######################## h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=(1, 64), num_filter=8, stride=(1, 1), dilate=(1, 1)) h1 = nd.LeakyReLU(h1_conv, act_type='elu') if debug: print("h1 shape: %s" % (np.array(h1.shape))) ######################## # Define the computation of the second convolutional layer ######################## h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=(1, 32), num_filter=8, stride=(1, 1), dilate=(1, 1)) h2_pooling = nd.Pooling(data=h2_conv, pool_type=pool_type, kernel=(1, 8), stride=(1, pool_stride)) h2 = nd.LeakyReLU(h2_pooling, act_type='elu') if debug: print("h2 shape: %s" % (np.array(h2.shape))) ######################## # Define the computation of the third convolutional layer ######################## h3_conv = nd.Convolution(data=h2, weight=W3, bias=b3, kernel=(1, 32), num_filter=16, stride=(1, 1), dilate=(1, 1)) h3 = nd.LeakyReLU(h3_conv, act_type='elu') if debug: print("h3 shape: %s" % (np.array(h3.shape))) ######################## # Define the computation of the 4th convolutional layer ######################## h4_conv = nd.Convolution(data=h3, weight=W4, bias=b4, kernel=(1, 16), num_filter=16, stride=(1, 1), dilate=(1, 1)) h4_pooling = nd.Pooling(data=h4_conv, pool_type=pool_type, kernel=(1, 6), stride=(1, pool_stride)) h4 = nd.LeakyReLU(h4_pooling, act_type='elu') if debug: print("h4 shape: %s" % (np.array(h4.shape))) ######################## # Define the computation of the 5th convolutional layer ######################## h5_conv = nd.Convolution(data=h4, weight=W5, bias=b5, kernel=(1, 16), num_filter=32, stride=(1, 1), dilate=(1, 1)) h5 = nd.LeakyReLU(h5_conv, act_type='elu') if debug: print("h5 shape: %s" % (np.array(h5.shape))) ######################## # Define the computation of the 6th convolutional layer ######################## h6_conv = nd.Convolution(data=h5, weight=W6, bias=b6, kernel=(1, 16), num_filter=32, stride=(1, 1), dilate=(1, 1)) h6_pooling = nd.Pooling(data=h6_conv, pool_type=pool_type, kernel=(1, 4), stride=(1, pool_stride)) h6 = nd.LeakyReLU(h6_pooling, act_type='elu') if debug: print("h6 shape: %s" % (np.array(h6.shape))) ######################## # Flattening h6 so that we can feed it into a fully-connected layer ######################## h7 = nd.flatten(h6) if debug: print("Flat h7 shape: %s" % (np.array(h7.shape))) ######################## # Define the computation of the 8th (fully-connected) layer ######################## h8_linear = nd.dot(h7, W7) + b7 h8 = nd.LeakyReLU(h8_linear, act_type='elu') if autograd.is_training(): # 对激活函数的输出使用droupout h8 = dropout(h8, drop_prob) if debug: print("h8 shape: %s" % (np.array(h8.shape))) ######################## # Define the computation of the 9th (fully-connected) layer ######################## h9_linear = nd.dot(h8, W8) + b8 h9 = nd.LeakyReLU(h9_linear, act_type='elu') if autograd.is_training(): # 对激活函数的输出使用droupout h9 = dropout(h9, drop_prob) if debug: print("h9 shape: %s" % (np.array(h9.shape))) ######################## # Define the computation of the output layer ######################## yhat_linear = nd.dot(h9, W9) + b9 if debug: print("yhat_linear shape: %s" % (np.array(yhat_linear.shape))) interlayer = [ W1, b1, W2, b2, W3, b3, W4, b4, W5, b5, W6, b6, W7, b7, W8, b8, W9, b9 ] return yhat_linear, interlayer
def network( X, drop_rate=0.0 ): # formula : output_size=((input−weights+2*Padding)/Stride)+1 #data size # MNIST,FashionMNIST = (batch size , 1 , 28 , 28) # CIFAR = (batch size , 3 , 32 , 32) # builtin The BatchNorm function moving_mean, moving_var does not work. C_H1 = nd.Activation( data=nd.BatchNorm(data=nd.Convolution(data=X, weight=W1, bias=B1, kernel=(3, 3), stride=(1, 1), num_filter=60), gamma=gamma1, beta=beta1, moving_mean=ma1, moving_var=mv1, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu" ) # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) P_H1 = nd.Pooling( data=C_H1, pool_type="avg", kernel=(2, 2), stride=(2, 2) ) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15) C_H2 = nd.Activation( data=nd.BatchNorm(data=nd.Convolution(data=P_H1, weight=W2, bias=B2, kernel=(6, 6), stride=(1, 1), num_filter=30), gamma=gamma2, beta=beta2, moving_mean=ma2, moving_var=mv2, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu" ) # MNIST : result = ( batch size , 30 , 8 , 8), CIFAR10 : result = ( batch size , 30 , 10 , 10) P_H2 = nd.Pooling( data=C_H2, pool_type="avg", kernel=(2, 2), stride=(2, 2) ) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5) P_H2 = nd.flatten(data=P_H2) '''FullyConnected parameter • data: (batch_size, input_dim) • weight: (num_hidden, input_dim) • bias: (num_hidden,) • out: (batch_size, num_hidden) ''' F_H1 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected( data=P_H2, weight=W3, bias=B3, num_hidden=120), gamma=gamma3, beta=beta3, moving_mean=ma3, moving_var=mv3, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu") F_H1 = nd.Dropout(data=F_H1, p=drop_rate) F_H2 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected( data=F_H1, weight=W4, bias=B4, num_hidden=64), gamma=gamma4, beta=beta4, moving_mean=ma4, moving_var=mv4, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu") F_H2 = nd.Dropout(data=F_H2, p=drop_rate) #softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10)) out = nd.FullyConnected(data=F_H2, weight=W5, bias=B5, num_hidden=10) return out
def net(X, params, debug=False, pool_type='avg', pool_size=16, pool_stride=2, act_type='relu', dilate_size=1, nf=1): [W1, b1, W2, b2, W3, b3, W4, b4, W5, b5] = params ######################## # Define the computation of the first convolutional layer ######################## h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=(1, 16), num_filter=int(16 * nf), stride=(1, 1), dilate=(1, dilate_size)) h1_activation = activation(h1_conv, act_type=act_type) h1 = nd.Pooling(data=h1_activation, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) if debug: print("h1 shape: %s" % (np.array(h1.shape))) ######################## # Define the computation of the second convolutional layer ######################## h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=(1, 8), num_filter=int(32 * nf), stride=(1, 1), dilate=(1, dilate_size)) h2_activation = activation(h2_conv, act_type=act_type) h2 = nd.Pooling(data=h2_activation, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) if debug: print("h2 shape: %s" % (np.array(h2.shape))) ######################## # Define the computation of the third convolutional layer ######################## h3_conv = nd.Convolution(data=h2, weight=W3, bias=b3, kernel=(1, 8), num_filter=int(64 * nf), stride=(1, 1), dilate=(1, dilate_size)) h3_activation = activation(h3_conv, act_type=act_type) h3 = nd.Pooling(data=h3_activation, pool_type=pool_type, kernel=(1, pool_size), stride=(1, pool_stride)) if debug: print("h3 shape: %s" % (np.array(h3.shape))) ######################## # Flattening h3 so that we can feed it into a fully-connected layer ######################## h4 = nd.flatten(h3) if debug: print("Flat h4 shape: %s" % (np.array(h4.shape))) ######################## # Define the computation of the 4th (fully-connected) layer ######################## h5_linear = nd.dot(h4, W4) + b4 h5 = activation(h5_linear, act_type=act_type) if autograd.is_training(): # 对激活函数的输出使用droupout h5 = dropout(h5, drop_prob) if debug: print("h5 shape: %s" % (np.array(h5.shape))) print("Dropout: ", drop_prob) ######################## # Define the computation of the output layer ######################## yhat_linear = nd.dot(h5, W5) + b5 if debug: print("yhat_linear shape: %s" % (np.array(yhat_linear.shape))) interlayer = [h1, h2, h3, h4, h5] return yhat_linear, interlayer