def __call__(self, x, *args, **kwargs): x = np.array(x) if x.ndim == 2: x = np.array([x]) if x.ndim != 3: raise Exception('Image should be ether 2d or 3d') if self.data_format == 'channels_last': x = np.transpose(x, (2, 0, 1)) d_x, h_x, w_x = x.shape if d_x != self.d_filters: raise Exception('different number of channels and filter depth') h_out = (h_x - self.h_filter + 2 * self.padding) / self.stride + 1 w_out = (w_x - self.w_filter + 2 * self.padding) / self.stride + 1 if not h_out.is_integer() or not w_out.is_integer(): raise Exception('Invalid output dimension!') h_out, w_out = int(h_out), int(w_out) X_col = im2col_indices(x, self.h_filter, self.w_filter, padding=self.padding, stride=self.stride) out = self.W @ X_col out = out.reshape(self.n_filters, h_out, w_out) if self.bias is not None: for i, b in enumerate(self.bias): out[i] += b if self.data_format == 'channels_last': out = np.transpose(out, (1, 2, 0)) return out
def backward(self, X, grad): N, W_out, H_out, D_out = grad.shape N, W_in, H_in, D_in = X.shape #Preprocess 'a' for im2col utility grad = np.rollaxis(grad, 3, 1) grad = np.rollaxis(grad, 0, 4) # D_out X H_out X W_out X N X = np.rollaxis(X, 3, 1) a_columnar = im2col_indices(X, self.field, self.field, self.padding, self.stride) #[FFD X W_out*H_out*N] grad = grad.reshape(D_out, N * W_out * H_out) dWeight = np.dot(grad, a_columnar.T) #[ D_out X FFD ] dBias = np.sum(grad, axis=1) #D_out dActivation = np.dot(grad.T, self.weights).T #[FFD_out X N*W_out**2] dActivation = col2im_indices(dActivation, (N, D_in, W_in, H_in), self.field, self.field, self.padding, self.stride) # N X D_in X W_in X H_in #Move D axis to end dX = np.rollaxis(dActivation, 1, 4) #N X W_in X H_in X D_in self.dw = dWeight self.db = dBias return dX
def max_pool_forward_naive(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None pool_height, pool_width, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride'] N, C, H, W = x.shape ############################################################################# # TODO: Implement the max pooling forward pass # ############################################################################# assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' out_height = (H-pool_height)/stride + 1 out_width = (W-pool_width)/stride + 1 x_shape = x.reshape(N*C,1,H,W) x_col = im2col.im2col_indices(x_shape, field_height=pool_height, field_width=pool_width, padding=0, stride=stride) out = np.max(x_col, axis=0).reshape(out_height, out_width, N, C).transpose(2,3,0,1) ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, pool_param) return out, cache
def cnn_forward_pass(X, W, b, stride=1, padding=1): out = None N, B, R, C = X.shape F, _, HH, WW = W.shape # Dimensionality check assert ( R + 2 * padding - HH) % stride == 0, 'width doesn\'t work with current parameter setting' assert ( C + 2 * padding - WW ) % stride == 0, 'height doesn\'t work with current parameter setting' # Initialize output out_H = (R + 2 * padding - HH) / stride + 1 out_W = (C + 2 * padding - WW) / stride + 1 out = np.zeros((N, F, out_H, out_W), dtype=X.dtype) x_cols = im2col_indices(X, HH, WW, padding, stride) res = W.reshape((W.shape[0], -1)).dot(x_cols) + b[:, np.newaxis] out = res.reshape((F, out_H, out_W, N)) out = out.transpose(3, 0, 1, 2) cache = (X, W, b, stride, padding, x_cols) return out, cache
def max_pool_backward_naive(dout, cache): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None x, pool_param = cache pool_height, pool_width, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride'] N, C, H, W = x.shape ############################################################################# # TODO: Implement the max pooling backward pass # ############################################################################# assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' x_shape = x.reshape(N*C,1,H,W) x_col = im2col.im2col_indices(x_shape, field_height=pool_height, field_width=pool_width, padding=0, stride=stride) x_arg = np.argmax(x_col, axis=0) dx = np.zeros((pool_height*pool_width, len(x_arg))) dx[np.argmax(x_col, axis=0), xrange(len(x_arg))] = dout.transpose(2,3,0,1).reshape(-1) dx = im2col.col2im_indices(dx, (N * C, 1, H, W), pool_height, pool_width, padding=0, stride=stride) dx = dx.reshape(x.shape) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx
def dil(X, W, k, stride = 1, padding = 1): #n_filters, d_filter, h_filter, w_filter = tf.shape(W) #filter = tf.shape(W) filter = W.get_shape() #n_x, d_x, h_x, w_x = tf.shape(X) n = X.get_shape() h_out = (n[1].value - filter[0].value + 2 * padding) / stride + 1 w_out = (n[2].value - filter[1].value + 2 * padding) / stride + 1 #if not h_out.is_integer() or not w_out.is_integer(): # raise Exception('Invalid output dimension!') #h_out, w_out = int(h_out), int(w_out) X_col = im2col_indices(X, filter[0], filter[1], padding=padding, stride=stride) #W_col = W.reshape(n_filters, -1) W_col = tf.reshape(W, [filter[2].value, -1]) out = tf.log(k* W_col @ exp(X_col))/k #out = out.reshape(n_filters, h_out, w_out, n_x) out = tf.reshape(out,[filter[0].value, h_out, w_out, n[0].value]) #out = out.transpose(3, 0, 1, 2) out = tf.transpose(out, perm = [3, 0, 1, 2]) #cache = (X, W, b, stride, padding, X_col) return out
def _forward(self, X): self.X = X N = X.shape[0] W = self.W['val'] b = self.b['val'] n_filter, d_filter, h_filter, w_filter = W.shape n_x, d_x, h_x, w_x = X.shape h_out = (h_x - h_filter + 2 * self.pad) / self.S + 1 h_out = int(h_out) w_out = (w_x - w_filter + 2 * self.pad) / self.S + 1 w_out = int(w_out) # Let this be 3x3 convolution with stride = 1 and padding = 1 # Suppose our X is 5x1x10x10, X_col will be a 9x500 matrix self.X_col = im2col.im2col_indices(X, h_filter, w_filter, padding=1, stride=1) # Suppose we have 20 of 3x3 filter: 20x1x3x3. W_col will be 20x9 matrix W_col = W.reshape(n_filter, -1) # 20x9 x 9x500 = 20x500 # b should have size 6 * 50176, 50176=64*1*28*28 bb = np.tile(b, (self.X_col.shape[1], 1)) bb = np.transpose(bb) out = W_col @ self.X_col + bb # Reshape back from 20x500 to 5x20x10x10 # i.e. for each of our 5 images, we have 20 results with size of 10x10 out = out.reshape(n_filter, h_out, w_out, n_x) out = out.transpose(3, 0, 1, 2) self.cache = X return out
def convFast(self, x, k, b, nonlinear_type, stride=1, pad=0): x = x[None, :, :, :] N, C, H, W = x.shape num_filters, _, filter_height, filter_width = k.shape # Check dimensions assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' # Create output out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 out = np.zeros((N, num_filters, out_height, out_width)) x_cols = im2col.im2col_indices(x, k.shape[2], k.shape[3], pad, stride) #print x_cols.shape #x_cols = im2col_cython(x, k.shape[2], k.shape[3], pad, stride) res = k.reshape((k.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(k.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) out = self.nonlinear_unit(np.squeeze(out), nonlinear_type) return out
def forward(self, x: np.ndarray) -> np.ndarray: def maxpool(X_col): max_idx = np.argmax(X_col, axis=0) out = X_col[max_idx, range(max_idx.size)] return out, max_idx x = x.reshape((x.shape[0], 1, 2 * x.shape[2], -1)) n, d, h, w = x.shape h_out = (h - self.size) / self.stride + 1 w_out = (w - self.size) / self.stride + 1 if not w_out.is_integer() or not h_out.is_integer(): raise Exception("Invalid output dimension!") h_out, w_out = int(h_out), int(w_out) X_reshaped = x.reshape(n * d, 1, h, w) X_col = im2col_indices(X_reshaped, self.size, self.size, padding=0, stride=self.stride) out, pool_cache = maxpool(X_col) out = out.reshape(h_out, w_out, n, d) out = out.transpose(2, 3, 0, 1) return out
def execute(self, v): # reshape the input vector into a 2D image img = v.reshape((1, self.chans, self.idim, self.idim)) # call im2col to get the sliding window result res = im2col_indices(img, self.k, self.k, padding=0, stride_y=self.s, stride_x=self.s) return res.flatten()
def conv_forward_naive(x, w, b, conv_param): """ 卷积层的前向运算 The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None N, C, H, W = x.shape F, _, HH, WW = w.shape stride, pad = conv_param['stride'], conv_param['pad'] # 维度检测 assert ( H + 2 * pad - HH) % stride == 0, 'width doesn\'t work with current paramter setting' assert ( W + 2 * pad - WW) % stride == 0, 'height doesn\'t work with current paramter setting' # 初始化输出 out_H = (H + 2 * pad - HH) / stride + 1 out_W = (W + 2 * pad - WW) / stride + 1 out = np.zeros((N, F, out_H, out_W), dtype=x.dtype) from im2col import im2col_indices x_cols = im2col_indices(x, HH, WW, padding=pad, stride=stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b[:, np.newaxis] out = res.reshape((F, out_H, out_W, N)) out = out.transpose(3, 0, 1, 2) cache = (x, w, b, conv_param, x_cols) return out, cache
def forward(self, x): N, c, h, w = x.shape y_height = ((h + 2 * self.pad - self.kernel_h) // self.stride) + 1 y_width = ((w + 2 * self.pad - self.kernel_w) // self.stride) + 1 xcol = im2col.im2col_indices(x, self.kernel_h, self.kernel_w, self.pad, self.stride) W = self.params['w'].reshape(self.kernel_number, -1) b = self.params['b'] y = np.dot(W, xcol) + b y = y.reshape(self.kernel_number, y_height, y_width, N).transpose(3, 0, 1, 2) return y
def backward(self, x, dy): N, c, h, w = x.shape xshaped = x.reshape(N * c, 1, h, w) xcol = im2col.im2col_indices(xshaped, self.kernel_h, self.kernel_w, self.pad, self.stride) dxcol = np.zeros_like(xcol) dy = dy.transpose(2, 3, 0, 1).ravel() dxcol[self.params['max_x'], range(self.params['max_x'].size)] = dy dx = im2col.col2im_indices(dxcol, (N * c, 1, h, w), self.kernel_h, self.kernel_w, self.pad, self.stride) dx = dx.reshape(x.shape) return dx
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ############################################################################# # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ############################################################################# N, C, H, W = x.shape F, _, HH, WW = w.shape stride, pad = conv_param['stride'], conv_param['pad'] # Dimensionality check assert ( H + 2 * pad - HH) % stride == 0, 'width doesn\'t work with current paramter setting' assert ( W + 2 * pad - WW) % stride == 0, 'height doesn\'t work with current paramter setting' # Initialize output out_H = ( H + 2 * pad - HH) / stride + 1 out_W = ( W + 2 * pad - WW) / stride + 1 out = np.zeros( (N, F, out_H, out_W), dtype=x.dtype ) from im2col import im2col_indices x_cols = im2col_indices(x, HH, WW, padding=pad, stride=stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b[:, np.newaxis] out = res.reshape((F, out_H, out_W, N)) out = out.transpose(3, 0, 1, 2) cache = (x, w, b, conv_param, x_cols) return out, cache
def forward(self, stimulus): self.stimulus = stimulus stimulusCols = im2col_indices(self.stimulus, self.filterHeight, self.filterWidth, self.padding, self.stride) filterCols = self.filters.reshape((self.numFilters, -1)) biasCols = self.bias.repeat(self.outHeight * self.outWidth, axis=1) activationCols = conv.activate( np.dot(filterCols, stimulusCols) + biasCols) self.activation = activationCols.reshape( self.numFilters, self.outHeight, self.outWidth, self.numInput).transpose(3, 0, 1, 2) return self.activation
def forward(self, x): N, c, h, w = x.shape y_height = ((h + 2 * self.pad - self.kernel_h) // self.stride) + 1 y_width = ((w + 2 * self.pad - self.kernel_w) // self.stride) + 1 self.params['y_height'] = y_height self.params['y_width'] = y_width xshaped = x.reshape(N * c, 1, h, w) xcol = im2col.im2col_indices(xshaped, self.kernel_h, self.kernel_w, self.pad, self.stride) max_x = np.argmax(xcol, axis=0) self.params['max_x'] = max_x y = xcol[max_x, range(max_x.size)] y = y.reshape(y_height, y_width, N, c).transpose(2, 3, 0, 1) return y
def conv_forward_naive(x, w, b, conv_param): """ A naive implementation of the forward pass for a convolutional layer. The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) """ out = None ############################################################################# # TODO: Implement the convolutional forward pass. # # Hint: you can use the function np.pad for padding. # ############################################################################# N, C, H, W = x.shape num_filters, _, filter_height, filter_width = w.shape stride, pad = conv_param["stride"], conv_param["pad"] # Check dimensions assert (W + 2 * pad - filter_width) % stride == 0, "width does not work" assert (H + 2 * pad - filter_height) % stride == 0, "height does not work" out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype) x_cols = im2col.im2col_indices(x, w.shape[2], w.shape[3], pad, stride) res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) ############################################################################# # END OF YOUR CODE # ############################################################################# cache = (x, w, b, conv_param, x_cols) return out, cache
def forward(self, x): self.x = x n, c_in, h_in, w_in = x.shape n_filters, c_kernel, h_kernel, w_kernel = self._weight.shape h_out = int(((h_in + 2 * self._padding - self._stride * (h_kernel - 1) -1) / self._stride) + 1) w_out = int(((w_in + 2 * self._padding - self._stride * (w_kernel -1) - 1) / self._stride) + 1) x_col = im2col_indices(x, h_kernel, w_kernel, padding=self._padding, stride=self._stride) w_col = self._weight.reshape(n_filters, -1) out = w_col @ x_col + self._bias out = out.reshape(n_filters, h_out, w_out, n) out = out.transpose(3, 0, 1, 2) return out
def backward(self, x, dy): xcol = im2col.im2col_indices(x, self.kernel_h, self.kernel_w, self.pad, self.stride) dy = dy.transpose(1, 2, 3, 0).reshape(self.kernel_number, -1) db = np.sum(dy, axis=1) dw = np.dot(dy, xcol.T).reshape(self.kernel_number, self.kernel_h, self.kernel_w, -1).transpose(0, 3, 1, 2) self.params['dw'] = dw self.params['db'] = db W_shaped = self.params['w'].reshape(self.kernel_number, -1) dx = np.dot(dy.T, W_shaped).T dx_im = im2col.col2im_indices(dx, x.shape, self.kernel_h, self.kernel_w, self.pad, self.stride) return dx_im
def backward(self, out_grad): n_filters, c_kernel, h_kernel, w_kernel = self._weight.shape self._grad_bias = np.sum(out_grad, axis=(0, 2, 3)) self._grad_bias = self._grad_bias.reshape(n_filters, -1) x_col = im2col_indices(self.x, h_kernel, w_kernel, padding=self._padding, stride=self._stride) out_grad_reshaped = out_grad.transpose(1, 2, 3, 0).reshape(n_filters, -1) self._grad_weight = out_grad_reshaped @ x_col.T self._grad_weight = self._grad_weight.reshape(self._weight.shape) weight_reshaped = self._weight.reshape(n_filters, -1) grad_x_col = weight_reshaped @ out_grad_reshaped grad_x = col2im_indices(grad_x_col, self.x.shape, h_kernel, w_kernel, padding=padding, stride=stride) return grad_x
def forward(self, X): #Image size is W * H * D N, W_in, _, _ = X.shape W_out = int((W_in - self.field + 2 * self.padding) / self.stride) + 1 #Preprocessing X for im2col library #TODO: Save this result X_pre = np.rollaxis(X, 3, 1) X_columnar = im2col_indices(X_pre, self.field, self.field, self.padding, self.stride) #[FFD_in X N*W_out**2] ? result = np.dot(self.weights, X_columnar) # [ D_out X N*W_out**2 ] result = np.reshape(result.T, (W_out, W_out, N, self.filters)) result = np.rollaxis(result, 2) result += self.biases return result
def backward(self, delta, alpha): stimulusCols = im2col_indices(self.stimulus, self.filterHeight, self.filterWidth, self.padding, self.stride) filterCols = self.filters.reshape((self.numFilters, -1)) activationCols = self.activation.transpose(1, 2, 3, 0).reshape( self.numFilters, -1) deltaCols = delta.transpose(1, 2, 3, 0).reshape( self.numFilters, -1) * conv.aprime(activationCols) inputPartial = col2im_indices( np.dot(deltaCols.transpose(1, 0), filterCols), self.stimulus.shape, self.filterHeight, self.filterWidth, self.padding, self.stride) self.filters -= alpha * np.dot( deltaCols, stimulusCols.transpose(1, 0)).reshape( self.filters.shape) self.bias -= alpha * np.sum(deltaCols, axis=1).reshape( self.bias.shape) return inputPartial
def execute(self, v): img = v.reshape((self.chans, self.idim, self.idim)) out_img = np.zeros((self.chans, self.odim * self.odim), dtype=np.float32) for c in range(self.chans): chan_img = img[c].reshape((1, 1, self.idim, self.idim)) # extract parts of image with sliding window wnd = im2col_indices(chan_img, self.k, self.k, padding=0, stride_y=self.s, stride_x=self.s) # each window is a column -- get the reduction along columns if self.poolFxn == "MAX": out_img[c] = wnd.max(axis=0).flatten() elif self.poolFxn == "AVE": out_img[c] = wnd.mean(axis=0).flatten() else: raise Exception("Unsupported pooling function") return out_img.flatten()
def backward(self, x: np.ndarray, dy: np.ndarray) -> np.ndarray: n_filter, d_filter, h_filter, w_filter = self.weight.shape db = np.sum(dy, axis=(0, 2, 3)) self.dbias = db.reshape(n_filter, -1) X_col = im2col_indices(x, h_filter, w_filter, padding=self.padding, stride=self.stride) dout_reshaped = dy.transpose(1, 2, 3, 0).reshape(n_filter, -1) dW = dout_reshaped @ X_col.T self.dweight = dW.reshape(self.weight.shape) W_reshape = self.weight.reshape(n_filter, -1) dX_col = W_reshape.T @ dout_reshaped dX = col2im_indices(dX_col, x.shape, h_filter, w_filter, padding=self.padding, stride=self.stride) return dX
def forward(self, x: np.ndarray) -> np.ndarray: n_filters, d_filter, h_filter, w_filter = self.weight.shape x = x.reshape((x.shape[0], 1, x.shape[1], x.shape[2])) n_x, d_x, h_x, w_x = x.shape h_out = (h_x - h_filter + 2 * self.padding) / self.stride + 1 w_out = (w_x - w_filter + 2 * self.padding) / self.stride + 1 if not h_out.is_integer() or not w_out.is_integer(): raise Exception("Invalid output dimension!") h_out, w_out = int(h_out), int(w_out) X_col = im2col_indices(x, h_filter, w_filter, padding=self.padding, stride=self.stride) W_col = self.weight.reshape(n_filters, -1) out = W_col @ X_col out += np.array(out.shape[1] * [self.bias]).T out = out.reshape(n_filters, h_out, w_out, n_x) out = out.transpose(3, 0, 1, 2) return out
def backward(self, x: np.ndarray, dy: np.ndarray) -> np.ndarray: def dmaxpool(dX_col, dout_col, pool_cache): dX_col[pool_cache, range(dout_col.size)] = dout_col return dX_col x = x.reshape((x.shape[0], 1, 2 * x.shape[2], -1)) X_col = im2col_indices(x, self.size, self.size, padding=0, stride=self.stride) n, d, w, h = x.shape dX_col = np.zeros_like(X_col) dout_col = dy.transpose(2, 3, 0, 1).ravel() # dX = dmaxpool(dX_col, dout_col, pool_cache) dX = col2im_indices(dX_col, (n * d, 1, h, w), self.size, self.size, padding=0, stride=self.stride) dX = dX.reshape(x.shape) return dX
def convFast(self, x, k, b, nonlinear_type, stride = 1, pad = 0): x = x[None,:,:,:] N, C, H, W = x.shape num_filters, _, filter_height, filter_width = k.shape # Check dimensions assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work' assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work' # Create output out_height = (H + 2 * pad - filter_height) / stride + 1 out_width = (W + 2 * pad - filter_width) / stride + 1 out = np.zeros((N, num_filters, out_height, out_width)) x_cols = im2col.im2col_indices(x, k.shape[2], k.shape[3], pad, stride) #print x_cols.shape #x_cols = im2col_cython(x, k.shape[2], k.shape[3], pad, stride) res = k.reshape((k.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1) out = res.reshape(k.shape[0], out.shape[2], out.shape[3], x.shape[0]) out = out.transpose(3, 0, 1, 2) out = self.nonlinear_unit(np.squeeze(out), nonlinear_type) return out
def conv_backward_naive(dout, cache): """ A naive implementation of the backward pass for a convolutional layer. Inputs: - dout: Upstream derivatives. - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b """ dx, dw, db = None, None, None x, w, b, conv_param = cache pad, stride = conv_param['pad'], conv_param['stride'] F, C, HH, WW = w.shape x_cols = im2col.im2col_indices(x, HH, WW, pad, stride) N, F, H_, W_ = dout.shape ############################################################################# # TODO: Implement the convolutional backward pass. # ############################################################################# dout_reshape = dout.reshape(N, -1) db = np.sum(dout, axis=(0, 2, 3)) dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(F, -1) dw = dout_reshaped.dot(x_cols.T).reshape(w.shape) dx_cols = w.reshape(F, -1).T.dot(dout_reshaped) dx = im2col.col2im_indices(dx_cols, x.shape, HH, WW, pad, stride) ############################################################################# # END OF YOUR CODE # ############################################################################# return dx, dw, db
#grad_1_part_3 = X[i] #grad_1_part_1_reshape = np.reshape(grad_1_part_1,(2,2)) #grad_1_temp_1 = grad_1_part_1_reshape * grad_1_part_2 #grad_1 = signal.convolve2d(grad_1_part_3, np.rot90(grad_1_temp_1,2),'valid') final_out, start_out = np.array([[]]), np.array([[]]) for it in range(num_epochs): for i, X_ in enumerate(X_train): layer_1 = signal.convolve2d(X_[0], w1, 'same') layer_1_act = tanh(layer_1) layer_1_axis1 = np.expand_dims(layer_1_act, axis=0) layer_1_act = np.expand_dims(layer_1_axis1, axis=1) layer_1_col = im2col_indices(layer_1_act, 2, 2, 0, 1) max_pool_layer_1 = np.argmax(layer_1_col, axis=0) layer_2 = max_pool_layer_1.dot(w2) layer_2_act = log(layer_2) cost = np.square(layer_2_act - Y[i]).sum() * 0.5 if i % 100 == 0: print("Current iteration", it, "current_train", i, "current_cost:",
# forward pass with transposed kernel n_filters = cd.convweights.shape[0] inp_reshaped = inp.detach().numpy().transpose(1, 2, 3, 0).reshape(n_filters, -1) W_reshape = dilated.reshape(n_filters, -1) out_col = W_reshape.T @ inp_reshaped # image is recovered through col2im out_image = col2im_indices(out_col, cd.inp.shape, dilated_size, dilated_size, padding=padding) is_eq(output, out_image) ################################################################################################ ### Backward Pass dout_col = im2col_indices(cd.convlossTrans.detach().numpy(), dilated_size, dilated_size, padding=padding) #dout_reshaped = cd.convlossTrans.detach().numpy().transpose(1, 2, 3, 0).reshape(n_filter, -1) dX_col = W_reshape @ dout_col c, _, h, w = inp.shape dX = dX_col.reshape(n_filters, h, w, c).transpose(3, 0, 1, 2) is_eq(dX, inp.grad) dW = dout_col @ inp_reshaped.T dW = dW.reshape(dilated.shape)
# Same convolution conv = nn.Conv2d(cd.inp.shape[1], out_channels, kernel_size, padding=padding, bias=True) conv.weight.data = cd.convweights #conv.bias.data = torch.zeros(out_channels) conv.bias.data = cd.bias output = conv(cd.inp) output.backward(cd.convloss if padding == 0 else cd.convlossPad1) print(f"output {to_cpp(output)}") print(f"dinput {to_cpp(cd.inp.grad)}") print (f"weights {to_cpp(conv.weight)}") print(f"dweights {to_cpp(conv.weight.grad)}") # now with im2col n_filter = cd.convweights.shape[0] x_col = im2col_indices(inp.detach().numpy(), kernel_size, kernel_size, padding=padding) W_reshape = cd.convweights.detach().numpy().reshape(n_filter, -1) dout_reshaped = cd.convloss.detach().numpy().transpose(1, 2, 3, 0).reshape(n_filter, -1) if(padding == 1): dout_reshaped = cd.convlossPad1.detach().numpy().transpose(1, 2, 3, 0).reshape(n_filter, -1) dX_col = W_reshape.T @ dout_reshaped dX = col2im_indices(dX_col, cd.inp.shape, kernel_size, kernel_size, padding=padding)