def __init__(self, rng, nkerns, batch_size): # 第0层是卷积层加pooling层,20个feature map.5*5的卷积,2*2的pooling size self.layer0 = LeNetConvPoolLayer( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) # 第一层也是卷积层+pooling层,feature map的个数为50, 5*5的卷积,2*2的pooling size self.layer1 = LeNetConvPoolLayer( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. # layer2_input = layer1.output.flatten(2) # 将layer1的输出展平:(500, 50 * 4 * 4) = (500, 800) # construct a fully-connected sigmoidal layer # 第二层是hidden layer 输入大小为50 * 4 * 4 self.layer2 = HiddenLayer( rng, n_in=nkerns[1] * 4 * 4, n_out=500, activation_type="tanh" ) # layer2.input = layer2_input # classify the values of the fully-connected sigmoidal layer # 第三层是softmax层, 输入为500个样本(一个batch size), 输出为10(MNIST数据集一共10类) self.layer3 = OutputLayer(n_in=500, n_out=10)
class cnn(object): def __init__(self, rng, nkerns, batch_size): # 第0层是卷积层加pooling层,20个feature map.5*5的卷积,2*2的pooling size self.layer0 = LeNetConvPoolLayer( rng, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2) ) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) # 第一层也是卷积层+pooling层,feature map的个数为50, 5*5的卷积,2*2的pooling size self.layer1 = LeNetConvPoolLayer( rng, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. # layer2_input = layer1.output.flatten(2) # 将layer1的输出展平:(500, 50 * 4 * 4) = (500, 800) # construct a fully-connected sigmoidal layer # 第二层是hidden layer 输入大小为50 * 4 * 4 self.layer2 = HiddenLayer( rng, n_in=nkerns[1] * 4 * 4, n_out=500, activation_type="tanh" ) # layer2.input = layer2_input # classify the values of the fully-connected sigmoidal layer # 第三层是softmax层, 输入为500个样本(一个batch size), 输出为10(MNIST数据集一共10类) self.layer3 = OutputLayer(n_in=500, n_out=10) def negative_log_likelihood(self, y): self.layer3.input = self.layer2.a self.layer3.y = y # print "self.hiddenLayer.W" # print self.hiddenLayer.W # print "self.output_layer.W" # print self.output_layer.W # 更新正则项的值,以便于计算cost function的值 self.layer3.L2_sqr = (self.layer2.W ** 2).sum() + (self.layer3.W ** 2).sum() # print "self.L2_sqr" # print self.L2_sqr self.layer3.L1 = abs(self.layer2.W).sum() + abs(self.layer3.W).sum() return self.layer3.negative_log_likelihood() def errors(self, y): self.layer3.input = self.layer2.a self.layer3.y = y # print "self.output_layer.errors()" # print self.output_layer.errors() return self.layer3.errors() # 使用前向传播算法分别计算隐含层和输出层的输出 # 隐含层使用的激活函数是tanh # 输出层使用的激活函数是softmax def feedforward(self, input): xx = input # 1. 采用前向传导算法首先计算第0层的输出 self.layer0.forward_convolution(input) # self.layer0.forward_convolution_fft(input) # 激活函数使用tanh函数,但是可以先做pooling运算,再做tanh...可改进!!!! self.layer0.a_before_pooling = numpy.tanh(self.layer0.z) self.layer0.feed_forward_pooling() # self.layer0.feed_forward_pooling_fft() # 此时的输出为self.a 为pooling层的输出 # print "feed_forward" # print numpy.shape(self.layer0.a) # 2. 采用前向传导算法计算第1层的输出 self.layer1.forward_convolution(self.layer0.a) # (20L, 20L, 12L, 12L) # self.layer1.forward_convolution_fft(self.layer0.a) self.layer1.a_before_pooling = numpy.tanh(self.layer1.z) # 同上,可改进!!!! self.layer1.feed_forward_pooling() # self.layer1.feed_forward_pooling_fft() # print numpy.shape(self.layer1.a) # (20L, 50L, 4L, 4L) # 3.计算隐藏层的输出 # 注意首先要把卷积层+pooling层的输出展平才能作为隐藏层的输入 i = 0 layer2_input = numpy.zeros((self.layer1.a.shape[0], self.layer1.a.shape[1]*self.layer1.a.shape[2]*self.layer1.a.shape[3])) while i <self.layer1.a.shape[0]: # print numpy.shape(self.layer1.a[i, :]) layer2_input[i, :] = self.layer1.a[i, :].flatten() # 展开时应该怎样展开!!! i += 1 # print numpy.shape(layer2_input) self.layer2.forward_compute_z_a(layer2_input) # (20L, 800L) # 4. 计算最后一层softmax层的输出 # 隐含层结束后计算输出层的p_y_given_x,输出层的输入即最后一个隐含层的输出 self.layer3.forward_compute_p_y_given_x(self.layer2.a) # (20L, 10L) def back_propogation(self, x, y, learning_rate, L2_reg): # 1. 计算最后一层(输出层)的delta # start_time = timeit.default_timer() self.layer3.back_compute_delta(y) # end_time = timeit.default_timer() # print "time of output layer delta:" + str(end_time-start_time) # xx = self.layer2.a # 隐含层的a 注意a=f(z) # 保存输出层的W和delta之后再对W进行更新 next_W = self.layer3.W # (500L, 10L) next_delta = self.layer3.delta # (20L, 10L) # print "layer3" # print numpy.shape(next_W) # print numpy.shape(next_delta) # 2. 计算当前隐藏层的delta # start_time = timeit.default_timer() self.layer2.back_delta(next_W, next_delta) # end_time = timeit.default_timer() # print "time of hidden layer delta:" + str(end_time-start_time) next_W = self.layer2.W # (800L, 500L) next_delta = self.layer2.delta # (20L, 500L) # print "layer2" # print numpy.shape(next_W) # print numpy.shape(next_delta) # 3. 计算倒数第一个卷积层+pooling层的delta # 注意:从hidden层到pooling层没有W,但是需要计算delta # 计算从hidden layer层到pooling层的delta # start_time = timeit.default_timer() self.layer1.compute_pooling_layer_delta_from_hidden_layer(next_W, next_delta) # end_time = timeit.default_timer() # print "time of compute_pooling_layer_delta_from_hidden_layer:" + str(end_time-start_time) next_delta = self.layer1.delta_pooling # print "layer1" # # print numpy.shape(next_W) # print numpy.shape(next_delta) # (20L, 50L, 4L, 4L) # start_time = timeit.default_timer() self.layer1.compute_conv_delta_from_pooling_layer(next_delta) # self.layer1.compute_conv_delta_from_pooling_layer_fft(next_delta) # end_time = timeit.default_timer() # print "time of compute_conv_delta_from_pooling_layer:" + str(end_time-start_time) next_W = self.layer1.W next_delta = self.layer1.delta_conv # print "layer1" # print numpy.shape(next_W) # (50L, 20L, 5L, 5L) # print numpy.shape(next_delta) # (20L, 50L, 8L, 8L) # 4. 计算倒数第二个卷积层+pooling层的delta # start_time = timeit.default_timer() self.layer0.compute_pool_layer_delta_from_conv_layer(next_delta, next_W) # self.layer0.compute_pool_layer_delta_from_conv_layer_fft(next_delta, next_W) # end_time = timeit.default_timer() # print "time of compute_pool_layer_delta_from_conv_layer:" + str(end_time-start_time) # next_W = self.layer0.W next_delta = self.layer0.delta_pooling # print "layer0" # print numpy.shape(next_W) # print numpy.shape(next_delta) # (20L, 20L, 12L, 12L) # start_time = timeit.default_timer() self.layer0.compute_conv_delta_from_pooling_layer(next_delta) # self.layer0.compute_conv_delta_from_pooling_layer_fft(next_delta) # end_time = timeit.default_timer() # print "time of compute_conv_delta_from_pooling_layer:" + str(end_time-start_time) next_W = self.layer0.W next_delta = self.layer0.delta_conv # print "layer0" # print numpy.shape(next_W) # (20L, 1L, 5L, 5L) # print numpy.shape(next_delta) # (20L, 20L, 24L, 24L) # 下面开始更新W self.layer0.update_W_b(n_samples=x.shape[0], x=x, learning_rate=learning_rate,L2_reg=L2_reg) # self.layer0.update_W_b_fft(n_samples=x.shape[0], x=x, learning_rate=learning_rate,L2_reg=L2_reg) self.layer1.update_W_b(n_samples=x.shape[0], x=self.layer0.a, learning_rate=learning_rate,L2_reg=L2_reg) # self.layer1.update_W_b_fft(n_samples=x.shape[0], x=self.layer0.a, learning_rate=learning_rate,L2_reg=L2_reg) layer2_input = numpy.zeros((self.layer1.a.shape[0], self.layer1.a.shape[1]*self.layer1.a.shape[2]*self.layer1.a.shape[3])) i = 0 while i < self.layer1.a.shape[0]: # print numpy.shape(self.layer1.a[i, :]) layer2_input[i, :] = self.layer1.a[i, :].flatten() # 展开时应该怎样展开!!! i += 1 self.layer2.back_update_w_b(a=layer2_input, learning_rate=learning_rate, L2_reg=L2_reg) self.layer3.back_update_w_b(self.layer2.a, learning_rate=learning_rate, L2_reg=L2_reg) # 更新最后一层outlayer的W和b