def forward(self, x): k = self.kernel_size self.x_cols = im2col_cython(x, k, k, self.pad, self.stride) res = self.weights.dot(self.x_cols) + self.bias.reshape(-1, 1) N, C, H, W = x.shape out = res.reshape(self.num_filters, H, W, N) return out.transpose(3, 0, 1, 2)
def conv_forward_im2col(x, w, b, conv_param): """ A fast implementation of the forward pass for a convolutional layer based on im2col and col2im. """ N, C, H, W = x.shape num_filters, _, filter_height, filter_width = w.shape stride, pad = conv_param['stride'], conv_param['pad'] # Check dimensions assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' # Create output out_height = (H + 2 * pad - filter_height) // stride + 1 out_width = (W + 2 * pad - filter_width) // stride + 1 out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) cache = (x, w, b, conv_param, x_cols) return out, cache
def conv_forward_im2col(x, w, b, conv_param): """ A fast implementation of the forward pass for a convolutional layer based on im2col and col2im. """ N, C, H, W = x.shape num_filters, _, filter_height, filter_width = w.shape stride, pad = conv_param['stride'], conv_param['pad'] # Check dimensions assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' # Create output out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) cache = (x, w, b, conv_param, x_cols) return out, cache
def max_pool_forward_im2col(x, pool_param): ''' An implementation of the forward pass for max pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. ''' N, C, H, W = x.shape pool_height, pool_width = pool_param[ 'pool_height'], pool_param['pool_width'] stride = pool_param['stride'] assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' out_height = (H - pool_height) / stride + 1 out_width = (W - pool_width) / stride + 1 x_split = x.reshape(N * C, 1, H, W) x_cols = im2col_cython( x_split, pool_height, pool_width, padding=0, stride=stride) x_cols_argmax = np.argmax(x_cols, axis=0) x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) cache = (x.shape, x_cols, x_cols_argmax, pool_param) return out, cache
def avg_pool_forward_im2col(x, pool_param): ''' An implementation of the forward pass for avg pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. Possibly bogus if used w/o square pooling regions that tile the input. ''' N, C, H, W = x.shape pool_height, pool_width = pool_param[ 'pool_width'], pool_param['pool_width'] stride = pool_param['stride'] assert ( H - pool_height) % stride == 0 or H == pool_height, 'Invalid height' assert (W - pool_width) % stride == 0 or W == pool_width, 'Invalid width' out_height = np.floor((H - pool_height) / stride + 1) out_width = np.floor((W - pool_width) / stride + 1) x_split = x.reshape(N * C, 1, H, W) x_cols = im2col_cython( x_split, pool_height, pool_width, padding=0, stride=stride) x_cols_avg = np.mean(x_cols, axis=0) out = x_cols_avg.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) cache = (x.shape, x_cols, pool_param) return out, cache
def forward_conv(imgs, filters, stride, pad): N, C, H, W = imgs.shape # flip filters suhc that the number of outputs is # on the first dimension, this makes dot products # with the column matrix we get from im2col easier w = np.transpose(filters, axes=(1, 0, 2, 3)) num_filters, _, filter_height, filter_width = w.shape # DO some sanity checking on dimensions if (W + 2 * pad - filter_width) % stride != 0: raise ValueError( "Filter width {} does not work for image width {} and padding {}". format(filter_width, W, pad)) if (H + 2 * pad - filter_height) % stride != 0: raise ValueError( "Filter height {} does not work for image height {} and padding {}" .format(filter_height, H, pad)) out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 imgs_cols = im2col_cython(imgs, w.shape[2], w.shape[3], pad, stride) # compute w * imgs_cols res = w.reshape((w.shape[0], -1)).dot(imgs_cols) res = res.reshape(w.shape[0], out_height, out_width, imgs.shape[0]) res = np.transpose(res, axes=(3, 0, 1, 2)) # return the output of the convolution # as well as the im2col of imgs # (as we need that in the backward pass) return res, imgs_cols
def forward(self, x): # print("Shape of x is ", x.shape) #print("Shape of w is ", self.w.shape) #print("Shape of b is ", self.b.shape) w = self.w b = self.b N, C, H, W = x.shape F, _, HH, WW = w.shape stride, pad = self.stride, self.pad num_filters, _, filter_height, filter_width = w.shape # Create output out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) # x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride) x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) self.x = x self.w = w self.b = b self.x_cols = x_cols # print("Shape of out is ", out.shape) return out
def forward_max_pool(imgs, pool_height, pool_width, stride): N, C, H, W = imgs.shape # DO some sanity checking on dimensions if (W - pool_width) % stride != 0: raise ValueError( "Pool width {} does not work for image width {}".format( pool_width, W)) if (H - pool_height) % stride != 0: raise ValueError( "Pool height {} does not work for image height {}".format( pool_height, H)) out_height = (H - pool_height) / stride + 1 out_width = (W - pool_width) / stride + 1 # mangle toegther batch size and channels for all images # this way we can simply do an argmax after calling im2col imgs_cols = im2col_cython(imgs.reshape(N * C, 1, H, W), pool_height, pool_width, 0, stride) # compute maximum imgs_argmax = np.argmax(imgs_cols, axis=0) # get maximum values imgs_max = imgs_cols[imgs_argmax, np.arange(imgs_cols.shape[1])] # the output will simply be the selected maximum values reshaped # we have to do some transposes here since the maxima are currently # in im2col format res = imgs_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) return res, imgs_cols, imgs_argmax
def conv(input, W): # utils.check.goin() c_out = W.shape[0] c_in = W.shape[1] k_x = W.shape[2] k_y = W.shape[3] N = input.shape[0] h_in = input.shape[2] w_in = input.shape[3] h_out = h_in - k_x + 1 w_out = w_in - k_y + 1 output = np.zeros(shape=(N, c_out, h_out, w_out)) W = W.reshape(c_out, c_in * k_x * k_y) # of shape c_out x (c_in x k x k) # image = im2col_indices(input, k_x, k_y) image = im2col_cython(input, k_x, k_y, 0, 1) output = np.dot(W, image) return output.reshape(c_out, N, h_out, w_out).reshape(c_out, h_out, w_out, N).transpose(3, 0, 1, 2) ''' image = input.transpose(1, 0, 2, 3) image = image.reshape(c_in, N * h_in, w_in) image = np.lib.pad(image, ((0, 0), (0, k_x - 1), (0, 0)), 'constant') # utils.check.out_conv_for(); image = im2col(image, k_x, k_y) output = np.dot(W, image) output = output.reshape(c_out, N * h_in, w_out) output = output.reshape(c_out, N, h_in, w_out) if k_x > 1: output = output[:, :, :-(k_x - 1) , :] # print output.shape return output.transpose(1, 0, 2, 3) ''' for n in range(N): image = input[n] image = im2col(image, k_x, k_y) # now of shape (c_in x k x k) x (h_out x w_out) fm = np.dot(W, image) # of shape c_out x (h_out x w_out) output[n] = fm.reshape(c_out, h_out, w_out) for c in range(c_out): output[n][c] = output[n][c] return output
def compute_net_in(self): '''Computes fast convolution net-in using the im2col algorithm, C-compiled via Cython ''' batch_sz, n_chans, img_y, img_x = self.input.shape n_kers, n_ker_chans, ker_x, ker_y = self.wts.shape ker_sz = ker_x stride = 1 pad = int(np.ceil((ker_sz - 1) / 2)) # padded input spatial dims img_y_p, img_x_p = img_y + 2 * pad, img_x + 2 * pad self.input_cols = im2col_cython.im2col_cython(self.input, ker_sz, ker_sz, pad, stride) self.net_in = self.wts.reshape(len( self.wts), -1) @ self.input_cols + self.b.reshape(-1, 1) self.net_in = self.net_in.reshape(n_kers, img_y, img_x, batch_sz) self.net_in = self.net_in.transpose(3, 0, 1, 2)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): stride, pad = self.stride, self.pad x = in_data[0].asnumpy() w_real = in_data[1].asnumpy() x_n, x_d, x_h, x_w = x.shape f_n, _, f_h, f_w = w_real.shape # convert to colums x_cols = im2col_cython(x, f_w, f_h, pad[0], stride[0]) dout = out_grad[0].asnumpy() # (x_n, f_n, out_h, out_w) dout_ = dout.transpose(1, 2, 3, 0).reshape(f_n, -1) dwbin = dout_.dot(x_cols.T).reshape(self.wbin.shape) db = np.sum(dout, axis=(0, 2, 3)) # (f_n,) dx_cols = self.wbin.reshape(f_n, -1).T.dot(dout_) dx = col2im_cython(dx_cols, x_n, x_d, x_h, x_w, f_h, f_w, pad[0], stride[0]) # update gradient dw = self.update_grad(w_real, dwbin) self.assign(in_grad[0], req[0], mx.nd.array(dx)) self.assign(in_grad[1], req[0], mx.nd.array(dw))
def forward_max_pool(imgs, pool_height, pool_width, stride): N, C, H, W = imgs.shape # DO some sanity checking on dimensions if (W - pool_width) % stride != 0: raise ValueError("Pool width {} does not work for image width {}".format(pool_width, W)) if (H - pool_height) % stride != 0: raise ValueError("Pool height {} does not work for image height {}".format(pool_height, H)) out_height = (H - pool_height) / stride + 1 out_width = (W - pool_width) / stride + 1 # mangle toegther batch size and channels for all images # this way we can simply do an argmax after calling im2col imgs_cols = im2col_cython(imgs.reshape(N*C, 1, H, W), pool_height, pool_width, 0, stride) # compute maximum imgs_argmax = np.argmax(imgs_cols, axis=0) # get maximum values imgs_max = imgs_cols[imgs_argmax, np.arange(imgs_cols.shape[1])] # the output will simply be the selected maximum values reshaped # we have to do some transposes here since the maxima are currently # in im2col format res = imgs_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) return res, imgs_cols, imgs_argmax
def forward(self, is_train, req, in_data, out_data, aux): stride, pad = self.stride, self.pad x = in_data[0].asnumpy() w = in_data[1].asnumpy() x_n, x_d, x_h, x_w = x.shape f_n, f_d, f_h, f_w = w.shape # check dimensions assert (x_w + 2 * pad[0] - f_w) % stride[0] == 0, 'width does not work' assert (x_h + 2 * pad[1] - f_h) % stride[1] == 0, 'height does not work' # create output out_h = (x_h + 2 * pad[0] - f_h) / stride[0] + 1 out_w = (x_w + 2 * pad[1] - f_w) / stride[1] + 1 out = np.zeros((x_n, f_n, out_h, out_w), dtype=x.dtype) # binarize the weight self.binarize_weight(w) # convert to colums x_cols = im2col_cython(x, f_w, f_h, pad[0], stride[0]) res = self.wbin.reshape((f_n, -1)).dot(x_cols) out = res.reshape(f_n, out_h, out_w, x_n) out = out.transpose(3, 0, 1, 2) self.assign(out_data[0], req[0], mx.nd.array(out))
def forward(self, bottom, top): #forward propagation #print('start forward') #val = [5,5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] #my_mult = mult #print(my_mult(val,val)) time1 = time.process_time(); #bottom_stream=np.append(bottom[0].data.shape[1:],bottom[0].data.ravel()) #bottom_stream = im2col_indices(bottom[0].data, 5, 5, 2, 1) bottom_mat = im2col_cython.im2col_cython(bottom[0].data,self.ks,self.ks,self.pad,self.st) bottom_mat = np.transpose(bottom_mat) bottom_stream = bottom_mat.ravel() bottom_stream = np.append((bottom_mat.shape[0],bottom_mat.shape[1]),bottom_stream) time2 = time.process_time(); print("ravel time"); print(time2 - time1); #bottom_stream=bottom_stream[2:] #print("bottom_stream: ") #print(bottom_stream) time3 = time.process_time(); weight_stream=np.append((self.num_out,bottom_mat.shape[1]),self.blobs[0].data.ravel()) time4 = time.process_time(); print("ravel time"); print(time4 - time3); #print("weight_stream: ") #print(weight_stream) #print("top_stream: ") time5 = time.process_time(); result_stream=mult(bottom_stream,weight_stream).hw_value() time6 = time.process_time(); print("hw time"); print(time6 - time5); #print(result) time7 = time.process_time(); #result = np.reshape(result, (-1, 32)) #result = result.transpose(1,0) #result = result.ravel() result_stream=result_stream.astype(float) result = np.zeros(self.H*self.W*self.num_out) result=result.astype(float) acc8.acc8_func(result_stream,result); result = np.reshape(result, (-1, self.num_out)) result = result.transpose(1,0) result = result.ravel() result=np.reshape(result,top[0].data.shape) time8 = time.process_time(); print("reshape time"); print(time8 - time7); # check correctness print("bottom_mat shape") print(bottom_mat.shape) bottom_mat=np.transpose(bottom_mat) # end top[0].data[...]=result