def backward(self, dout): # dout : [BS, in_D, out_H, out_W] # self.in_col : [f_H*f_W*in_D, out_H*out_W*BS] dout_reshape = dout.transpose((2,3,0,1)).reshape((-1, self.in_D)).T.reshape((1,-1),order='F') # [ out_H *out_W * BS*in_D] din_col = self.W * dout_reshape # [f_H*f_W, out_H *out_W * BS * in_D] din_col = din_col.reshape((self.f_H * self.f_W * self.in_D, -1), order='F') # [f_H*f_W * in_D, out_H *out_W * BS] din = col2im(din_col, self.input_shape, [self.f_H, self.f_W], [self.s_H, self.s_W], [self.p_H, self.p_W]) return din # [BS, in_D, in_H, in_W]
def back_propagate(self, dout): dout_reshaped = dout.transpose(1, 2, 3, 0).reshape( self.in_D, 1, self.out_H * self.out_W * self.BS) #shape=(BS,in_D,out_H,out_W)->(in_D,out_H*out_W*BS) din_col = (self.iomat * dout_reshaped).reshape( self.in_D * self.f_H * self.f_W, self.out_H * self.out_W * self.BS) din = col2im(din_col, self.input_shape, [self.f_H, self.f_W], [self.stride_H, self.stride_W], [self.pad_H, self.pad_W]) return din
def backward(self, dout): # dout : [BS, out_D, out_H, out_W] # self.in_col : [f_H*f_W*in_D, out_H*out_W*BS] self.db = np.sum(dout, axis=(0,2,3)).reshape((self.out_D,1)) # [out_D, 1] dout_reshape = np.transpose(dout, (1,2,3,0)).reshape((self.out_D,-1)) #[out_D, out_H *out_W * BS] self.dw = np.matmul(dout_reshape, self.in_col.T) #[out_D, in_D*f_H*f_W] din_col = np.matmul(self.w_col.T, dout_reshape) # [f_H*f_W*in_D, out_H *out_W * BS] din = col2im(din_col, self.input_shape, [self.f_H, self.f_W], [self.s_H, self.s_W], [self.p_H, self.p_W]) return din #[BS, in_D, in_H, in_W]
def max_pooling_backward(x, dout, pool_params): H, W, D, N = x.shape x_reshaped = x.reshape(H, W, 1, -1) x_col = im2col(x_reshaped, pool_params['HF'], pool_params['WF'], pool_params['pad'], pool_params['stride']) x_col_argmax = np.argmax(x_col, axis=0) dx_col = np.zeros_like(x_col) dx_col[x_col_argmax, np.arange(x_col.shape[1])] = dout.ravel() dx_shaped = col2im(dx_col, x_reshaped.shape, pool_params['HF'], pool_params['WF'], pool_params['pad'], stride=pool_params['stride']) dx = dx_shaped.reshape(x.shape) return [dx]
def conv_backward(x, w, b, conv_param, dout): HF, WF, DF, NF = w.shape x_col = im2col(x, HF, WF, conv_param['pad'], conv_param['stride']) w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1)) db = np.sum(dout, axis=(0, 1, 3)) dout = dout.transpose(2, 0, 1, 3) dout = dout.reshape((w_col.shape[0], x_col.shape[-1])) dx_col = w_col.T.dot(dout) dw_col = dout.dot(x_col.T) dx = col2im(dx_col, x.shape, HF, WF, conv_param['pad'], conv_param['stride']) dw = dw_col.reshape((dw_col.shape[0], HF, WF, DF)) dw = dw.transpose(1, 2, 3, 0) return [dx, dw, db]
def back_propagate(self, dout): db = np.sum(dout, axis=(0, 2, 3)) self.db = db.reshape(self.out_D, 1) #shape=(out_D,1) dout_reshaped = dout.transpose(1, 2, 3, 0).reshape( self.out_D, -1) #shape=(BS,out_D,out_H,out_W)->(out_D,out_H*out_W*BS) self.dW_col = np.matmul(dout_reshaped, self.X_col.T) #shape=(out_D,f_H*f_W*in_D) din_col = np.matmul( self.W_col.T, dout_reshaped) #shape=(f_H*f_W*in_D,out_H*out_W*BS) din = col2im(din_col, self.input_shape, [self.f_H, self.f_W], [self.stride_H, self.stride_W], [self.pad_H, self.pad_W]) return din
def max_pooling_backward(x, dout, pool_params): print "in max_pooling_backward" print "dout.shape", dout.shape print "x.shape", x.shape H, W, D, N = x.shape x_reshaped = x.reshape(H, W, 1, -1) x_col = im2col(x_reshaped, pool_params['HF'], pool_params['WF'], pool_params['pad'], pool_params['stride']) x_col_argmax = np.argmax(x_col, axis=0) dx_col = np.zeros_like(x_col) #和x_col同样纬度的0矩阵 print " 1 dx_col.shape", dx_col.shape dx_col[x_col_argmax, np.arange(x_col.shape[1])] = dout.ravel() #把dout平铺 print " 2 dx_col.shape", dx_col.shape dx_shaped = col2im(dx_col, x_reshaped.shape, pool_params['HF'], pool_params['WF'], pool_params['pad'], stride=pool_params['stride']) dx = dx_shaped.reshape(x.shape) return [dx]
def conv_backward(x, w, b, conv_param, dout): print " in conv_backward" HF, WF, DF, NF = w.shape print "dout.shape", dout.shape x_col = im2col(x, HF, WF, conv_param['pad'], conv_param['stride']) #转换后变成(HF*WF*DF,N*Hout*Wout) print "x_col.shape", x_col.shape w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1)) #每一行是一个卷积核 #w_col的维度是(NF,HF,WF,DF),reshape成(NF,HF*WF*DF) db = np.sum(dout, axis=(0, 1, 3)) dout = dout.transpose(2, 0, 1, 3) #(NF,Hout,Wout, N) dout = dout.reshape((w_col.shape[0], x_col.shape[-1])) #(NF,N*Hout*Wout) dx_col = w_col.T.dot(dout) #当前层的残差 , (HF*WF*DF, N*Hout*Wout),和x_col的维度相同 dw_col = dout.dot(x_col.T) #当前层关于卷积核的梯度 dx = col2im(dx_col, x.shape, HF, WF, conv_param['pad'], conv_param['stride']) dw = dw_col.reshape((dw_col.shape[0], HF, WF, DF)) dw = dw.transpose(1, 2, 3, 0) return [dx, dw, db]
def decompose_filter(parent_filter_wt, filters=16): lamda = 0.0001 error = 1e-7 c1 = parent_filter_wt.shape[1] c2 = parent_filter_wt.shape[0] k = parent_filter_wt.shape[2] k1 = k k2 = k k_expanded = k1 + k2 - 1 pad_zero = nn.ZeroPad2d((k_expanded - k) / 2) new_weight = pad_zero(parent_filter_wt) # new_weight = add_noise(new_weight, new_weight) new_weight = new_weight.cpu().numpy() # output is the original parent filter which is generated by # convolving an image (=img_col) by filter (=kernel) # output_col is the 2D representation of an output generated after matrix # multiplication # output = new_weight # wt = parent_filter_wt.cpu().numpy() # mean = wt.mean() # std = wt.std() # var = wt.var() output = np.concatenate( (new_weight, np.zeros((c2, c2 - c1, k_expanded, k_expanded))), axis=1) output = np.concatenate( (output, np.zeros((filters - c2, c2, k_expanded, k_expanded))), axis=0) # NOISE_RATIO = 1e-5 # noise_range = NOISE_RATIO * np.ptp(parent_filter_wt.flatten()) # noise = np.random.uniform(-noise_range, noise_range, size=output.shape) # output = output + noise output_col = output.reshape(filters, -1).T # output_col2 = np.random.normal(0, 1e-2, size=output_col.shape) # print np.linalg.norm(output_col2 - output_col) # exit() # Below 2 lines can be removed # kernel is equivalent to filter f1 which will convolve image (=img_col) kernel = np.random.normal(0, 1e-3, size=(filters, c1, k1, k1)) kernel = np.concatenate( (kernel, np.zeros((filters, filters - c1, k1, k1))), axis=1) # kernel = np.random.choice(output.flatten(), size=(c2, filters, k1, k1)) kernel_col = kernel.reshape(filters, -1).T # img is the f2 filter treated as image to be convolved by f1(=kernel) # img_col is the 2D representation of a filter for matrix multiplication img = np.random.normal(0, 1e-3, size=(c2, filters, k2, k2)) # # img = np.random.choice(output.flatten(), size=(c2, filters, k2, k2)) img_col = im2col.im2col(img, k1, k1, stride=1, padding=k_expanded - k) # img_col = np.random.normal( # 0, 1e-2, size=(k_expanded * k_expanded * c2, k1 * k1 * filters)) # img_col_original = img_col.copy() # kernel_col = np.linalg.lstsq(img_col, output_col, rcond=None)[0] print kernel_col.shape print img_col.shape print output_col.shape print 'before calculating prod: ', print np.linalg.norm(np.dot(img_col, kernel_col) - output_col) # exit() for i in range(10): img_col = np.linalg.solve( np.dot(kernel_col, kernel_col.T) + lamda * np.eye( kernel_col.shape[0]), np.dot(kernel_col, output_col.T)).T kernel_col = np.linalg.solve( img_col.T.dot(img_col) + lamda * np.eye(img_col.shape[1]), np.dot(img_col.T, output_col)) print np.linalg.norm(np.dot(img_col, kernel_col) - output_col) if np.linalg.norm(np.dot(img_col, kernel_col) - output_col) < error: break x1 = img_col # c = 0.25 # kernel_col = kernel_col * c # img_col = img_col / c # Using Weighted ALS # print output_col # z = output_col > 0 # z = z.astype(np.float32) # for n in range(20): # for i, zi in enumerate(z): # img_col[i] = np.linalg.solve( # np.dot(kernel_col, np.dot(np.diag(zi), kernel_col.T)) + lamda * np.eye(kernel_col.shape[0]), # np.dot(kernel_col, np.dot(np.diag(zi), output_col[i].T))).T # # for j, zj in enumerate(z.T): # kernel_col[:, j] = np.linalg.solve( # np.dot(img_col.T, np.dot(np.diag(zj), img_col)) + lamda * np.eye(img_col.shape[1]), # np.dot(img_col.T, np.dot(np.diag(zj), output_col[:, j]))) # # print np.linalg.norm(np.dot(img_col, kernel_col) - output_col) # if np.linalg.norm(np.dot(img_col, kernel_col) - output_col) < error: # break print 'after calculating prod: ', new_prod = np.dot(img_col, kernel_col) print np.linalg.norm(new_prod - output_col) kernel = kernel_col.T.reshape(filters, filters, k1, k1) kernel = kernel[:, :c1, ...] print 'diff pad', print k_expanded - k img_calculated = im2col.col2im(col=img_col, input_shape=(c2, filters, k2, k2), filter_h=k1, filter_w=k1, padding=k_expanded - k) # img_calculated = im2col.recover_input( # input=img_col, kernel_size=k1, stride=1, outshape=(c2, filters, k2, k2)) img_calculated = img_calculated / 9 # because original matrix elements are added 9 times , for double padding # img_calculated = img_calculated/[[1, 2, 1], [2, 4, 2], [1, 2, 1]] for zero padding # img = (img / ([[4, 6, 4], [6, 9, 6], [4, 6, 4]]))/ for single padding # print 'image_col error: ', # print np.linalg.norm((img_col - img_col_original)) print 'image error: ', print np.linalg.norm((img_calculated - img)) # img_col2 = im2col.im2col(img_calculated, k1, k1, stride=1, padding=k_expanded - k) print 'after converting, product error = ', print np.linalg.norm(img_col2 - x1) # exit() # exit() # img = im2col.recover_input(input=img_col, kernel_size=k1, stride=1, # outshape=(c2, c, k2, k2)) # exit() # ************************************************************* # img = np.random.normal(0, 1e-2, size=(4, 4, 3, 3)) # original_img = img.copy() # img_col = im2col.im2col(img, 3, 3, 1, 2) # original_img_col = img_col.copy() # # print img_col # # img_col = np.random.randint(0, 4, size=(100, 36)) # # print img_col[0, 0] # # kernel_col = np.random.randint(0, 2, size=(36, 4)) # output_col = np.random.normal(0, 1e-2, size=(100, 4)) # # kernel_col = np.linalg.lstsq(img_col, output_col, rcond=None)[0] # # for i in range(100): # img_col = np.linalg.solve( # np.dot(kernel_col, kernel_col.T) + lamda * np.eye( # kernel_col.shape[0]), # np.dot(kernel_col, output_col.T)).T # # kernel_col = np.linalg.solve( # img_col.T.dot(img_col) + lamda * np.eye(img_col.shape[1]), # np.dot(img_col.T, output_col)) # # # print np.linalg.norm(np.dot(img_col, kernel_col) - output_col) # if np.linalg.norm(np.dot(img_col, kernel_col) - output_col) < error: # break # # print 'before converting after calcultating', # new_prod = np.dot(img_col, kernel_col) # print np.linalg.norm(new_prod - output_col) # # print np.linalg.norm(img_col - original_img_col) # img = im2col.col2im(img_col, (4, 4, 3, 3), 3, 3, padding=2) # img = img / 9 # # print np.linalg.norm(img - original_img) # img_col2 = im2col.im2col(img, 3, 3, padding=2) # new_prod = np.dot(img_col2, kernel_col) # print np.linalg.norm(new_prod - output_col) # exit() # ************************************************************* print parent_filter_wt.shape print kernel.shape print img_calculated.shape exit() return kernel, img_calculated