def max_pool_forward_im2col(x, pool_param): """ An implementation of the forward pass for max pooling based on im2col. This isn't much faster than the naive version, so it should be avoided if possible. """ N, C, H, W = x.shape pool_height, pool_width = pool_param['pool_height'], pool_param[ 'pool_width'] stride = pool_param['stride'] assert (H - pool_height) % stride == 0, 'Invalid height' assert (W - pool_width) % stride == 0, 'Invalid width' out_height = (H - pool_height) / stride + 1 out_width = (W - pool_width) / stride + 1 x_split = x.reshape(N * C, 1, H, W) x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride) x_cols_argmax = np.argmax(x_cols, axis=0) x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])] out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1) cache = (x, x_cols, x_cols_argmax, pool_param) return out, cache
def get_part_info(self, image): # Gathers information for partial patches; return a dict # whose keys are masks for observable pixels wrt a patch, # and values are indices of those pixels wrt the image patchSize = int(np.sqrt(self.D)) H, W = image.shape HFull = H + (patchSize - 1) * 2 WFull = W + (patchSize - 1) * 2 imgFull = np.reshape(np.arange(HFull * WFull), (HFull, WFull)) PgnFull = im2col(imgFull, patchSize).T NFull = PgnFull.shape[0] PgnPart = dict() for n in xrange(NFull): h, w = np.unravel_index(PgnFull[n], (HFull, WFull)) hMask = np.logical_and(h >= patchSize - 1, h <= HFull - patchSize) wMask = np.logical_and(w >= patchSize - 1, w <= WFull - patchSize) mask = np.logical_and(hMask, wMask) if not np.all(mask): h = h[mask] - (patchSize - 1) w = w[mask] - (patchSize - 1) idx = np.ravel_multi_index(np.array([h, w]), (H, W)) if tuple(mask) in PgnPart: PgnPart[tuple(mask)] = np.vstack( (PgnPart[tuple(mask)], idx)) else: PgnPart[tuple(mask)] = np.array([idx]) return PgnPart
def _initU(self, y): patchSize = int(np.sqrt(self.D)) patches = im2col(y, patchSize) u = np.mean(patches, axis=0) uPart = dict() for mask, idx in self.PgnPart.items(): uPart[mask] = np.mean(y.ravel()[idx], axis=1) return u, uPart
def forward(self, x, train=True): n, c, h, w = x.shape self.batch_size = n out_h, out_w = filter_out_size(h, w, self.fh, self.fw, self.stride_h, self.stride_w, self.padding) col = im2col(x, self.fh, self.fw, self.stride_h, self.stride_w, self.padding) col = col.reshape(-1, self.fh*self.fw) out = np.mean(col, axis=1) out = out.reshape(n, out_h, out_w, c).transpose(0, 3, 1, 2) return out
def forward(self, x): N, C, H, W = x.shape out_h = int(1 + (H - self.pool_h) / self.stride) out_w = int(1 + (W - self.pool_w) / self.stride) col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) col = col.reshape(-1, self.pool_h * self.pool_w) out = np.max(col, axis=1) out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) return out
def forward(self, x): FN, C, FH = self.W.shape N, C, H, W = x.shape out_h = int(1 + (H + 2 * self.pad - FH) / self.stride) out_w = int(1 + (W + 2 * self.pad - FW) / self.stride) col = im2col(x, FH, FW, self.stride, self.pad) col_W = self.W.reshape(FN, -1).T out = np.dot(col_w) + self.b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) return out
def forward(self, x, train=True): oc, _, fh, fw = self.w.shape n, _, h, w = x.shape out_h, out_w = filter_out_size(h, w, fh, fw, self.stride_h, self.stride_w, self.padding) col = im2col(x, fh, fw, self.stride_h, self.stride_w, self.padding) col_w = self.w.reshape(oc, -1).T out = np.dot(col, col_w) + self.b out = out.reshape(n, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_w = col_w return out
def forward(self, din): #save info to use backward procedure self.din_shape = din.shape #compute forward N, C, H, W = din.shape oh = (H + 2 * self.pad - self.fh) // self.stride + 1 ow = (W + 2 * self.pad - self.fw) // self.stride + 1 self.din = im2col(din, self.fh, self.fw, self.stride, self.pad) col_w = self.w.reshape(self.nf, -1).transpose() conv_result = np.dot(self.din, col_w) conv_result += self.b return conv_result.reshape(N, oh, ow, self.nf).transpose(0, 3, 1, 2)
def forward(self, din): self.din_shape = din.shape if len(self.din_shape) == 4: N, C, H, W = self.din_shape else: C, H, W = self.din_shape oh = int(1 + (H - self.fh) / self.stride) ow = int(1 + (W - self.fw) / self.stride) imcol = im2col(din, self.fh, self.fw, self.stride, self.pad) #N*oh*ow*C, fh*fw로 모양 변화 imcol = imcol.reshape(-1, self.fh * self.fw) self.mask = np.argmax(imcol, axis=1) dout = imcol.max(axis=1) return dout.reshape(N, oh, ow, C).transpose(0, 3, 1, 2)
def forward(self, x): FN, C, FH, FW = self.W.shape N, C, H, W = x.shape _, out_h, out_w = self.output_size((C, H, W)) col = im2col(x, FH, FW, self.stride, self.pad) col_W = self.W.reshape(FN, -1).T out = np.dot(col, col_W) + self.b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_W = col_W return out
def update_v(self, beta, x, u, uPart, resp, respPart, patchLst=None, IP=None): # fully observable patches if IP is None: IP = self.calcIterationParams(beta) D, K, GP, patchSize = self.D, self.K, self.GP, int(np.sqrt(self.D)) Px_minus_u = im2col(x, patchSize) - u if patchLst is not None: Px_minus_u = Px_minus_u[:, patchLst] NFull = Px_minus_u.shape[1] v = np.zeros((NFull, D)) for k in xrange(K): idx_k = np.flatnonzero(resp == k) if len(idx_k) == 0: continue cho = (IP.Rc[k] * beta, bool(IP.Rlower[k])) v[idx_k] = cho_solve(cho, Px_minus_u[:, idx_k], overwrite_b=True, check_finite=False).T # partially observable patches vPart = dict() for mask, idx in self.PgnPart.items(): maskLst = np.array(list(mask), dtype=bool) IPPart = self.calcIterationParams(beta, mask=maskLst) NPart = len(uPart[mask]) CT_Px_minus_u = np.zeros((D, NPart)) CT_Px_minus_u[maskLst, :] = x.ravel()[idx].T - uPart[mask] this_v = np.zeros((NPart, D)) for k in xrange(K): idx_k = np.flatnonzero(respPart[mask] == k) if len(idx_k) == 0: continue cho = (IPPart.Rc[k] * beta, bool(IPPart.Rlower[k])) this_v[idx_k] = cho_solve(cho, CT_Px_minus_u[:, idx_k], overwrite_b=True, check_finite=False).T vPart[mask] = this_v return v, vPart
def forward(self, x): # Convolution filternum, channel, filter_h, filter_w = self.W.shape batchsize, channel, height, width = x.shape conv_out_h = 1 + int((height + 2*self.conv_pad - filter_h) / self.conv_stride) conv_out_w = 1 + int((width + 2*self.conv_pad - filter_w) / self.conv_stride) self.x = x self.x_col = im2col(x, filter_h, filter_w, self.conv_stride, self.conv_pad) self.W_col = self.W.reshape(filternum, -1).T self.u_col = np.dot(self.x_col, self.W_col) self.u = self.u_col.reshape(batchsize, conv_out_h, conv_out_w, -1).transpose(0, 3, 1, 2) out = self.u return out
def forward(self, x): FN, C, FH, FW = self.W.shape N, C, H, W = x.shape out_h = 1 + int((H + 2 * self.pad - FH) / self.stride) out_w = 1 + int((W + 2 * self.pad - FW) / self.stride) col = im2col(x, FH, FW, self.stride, self.pad) col_W = self.W.reshape(FN, -1).T out = np.dot(col, col_W) + self.b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_W = col_W return out
def forward(self, x): N, C, H, W = x.shape ## <- (N, FN, Conv_OH, Conv_OW) out_h = 1 + int( (H - self.pool_h) / self.stride ) ## pooling OH -> OH out_w = 1 + int( (W - self.pool_w) / self.stride ) ## pooling OW -> OW col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) # (N * OH * OW, C * PH * PW) col = col.reshape(-1, self.pool_h * self.pool_w) # (N * OH * OW * C, PH * PW) arg_max = np.argmax(col, axis=1) # Array of indices into the array, one-dimension out = np.max(col, axis=1) # (N * OH * OW * C, 1) out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) # (N, C, OH, OW) self.x = x self.arg_max = arg_max return out
def forward(self, x): W_, b = self.params FN, C, FH, FW = self.params[0].shape N, C, H, W = x.shape out_h = int(1 + (H + 2 * self.pad - FH) / self.stride) out_w = int(1 + (W + 2 * self.pad - FW) / self.stride) col = im2col(x, FH, FW, self.stride, self.pad) col_W = W_.reshape(FN, -1).T out = np.dot(col, col_W) + b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_W = col_W return out
def forward(self, x): N, C, H, W = x.shape out_h = int(1 + (H - self.pool_h) / self.stride) out_w = int(1 + (W - self.pool_w) / self.stride) # 展開 col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) col = col.reshape(-1, self.pool_h * self.pool_w) arg_max = np.argmax(col, axis=1) # Maxプーリング out = np.max(col, axis=1) out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) self.x = x self.arg_max = arg_max return out
def forward(self, x): FN, C, FH, FW = self.W.shape # フィルターのパラメータ。次元順に分割して格納 N, C, H, W = x.shape # 入力データのパラメータ。次元順に分割して格納 out_h = int((H + 2 * self.pad - FH) / self.stride + 1) out_W = int((W + 2 * self.pad - FW) / self.stride + 1) col = im2col(x, FH, FW, self.stride, self.pad) col_w = self.W.reshape(FN, -1).T # フィルターの展開 out = np.dot(col, col_w) + self.b out = out.reshape(N, out_h, out_W, -1).transpose(0, 3, 1, 2) # 逆伝播に使用 self.x = x self.col = col self.col_W = col_W return out
def update_u(self, beta, x, v, vPart, patchLst=None): # fully observable patches D, GP, patchSize = self.D, self.GP, int(np.sqrt(self.D)) beta2inv = 1.0 / beta**2 gamma2 = 1.0 / (1.0 / GP.s2 + D * beta2inv) patches = im2col(x, patchSize) if patchLst is not None: patches = patches[:, patchLst] Px_minus_v = patches.T - v u = gamma2 * (GP.r / GP.s2 + beta2inv * np.sum(Px_minus_v, axis=1)) # partially observable patches uPart = dict() for mask, idx in self.PgnPart.items(): maskLst = np.array(list(mask), dtype=bool) NPart, DPart = idx.shape gamma2 = 1.0 / (1.0 / GP.s2 + DPart * beta2inv) Px_minus_v = x.ravel()[idx] - vPart[mask][:, maskLst] uPart[mask] = gamma2 * (GP.r / GP.s2 + beta2inv * np.sum(Px_minus_v, axis=1)) return u, uPart
def forward(self, x, train_flg=False): # Convolution FN, C, FH, FW = self.W.shape N, C, H, W = x.shape if self.first_flg: self.W /= np.sqrt(self.T / self.win_size) self.first_flg = False out_h = 1 + int((H - FH)/self.T) out_w = 1 + int((W - FW)/self.T) col = im2col(x, FH, FW, self.T, 0) col_W = self.W.reshape(FN, -1).T self.u_col = np.dot(col, col_W) + self.b self.u = self.u_col.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.x_col = col self.W_col = col_W self.u_sum = np.sum(self.u, axis=3).reshape((N,self.N)) if self.activation == 'tanh': self.conv_y = np.tanh(self.u_sum) elif self.activation == 'Relu': self.mask = (self.u_sum <= 0) self.conv_y = self.u_sum self.conv_y[self.mask] = 0 elif self.activation == 'None': self.conv_y = self.u_sum else: print('Error : Activation function ' + self.activation + ' is undefined.') self.arg_max = np.argmax(self.conv_y, axis=1) y = np.max(self.conv_y, axis=1) self.pool_y = y self.y = self.u return y
def forward(self, x): # input shape: (batch_num, channel, height, width) # filter shape: (filter_num, channel, filter_height, filter_width) # output shape: (batch_num, filter_num, output_height, output_width) N, C, H, W = x.shape FN, C, FH, FW = self.W.shape out_h = 1 + int( (H + 2*self.pad - FH) / self.stride ) # OH out_w = 1 + int( (W + 2*self.pad - FW) / self.stride ) # OW col = im2col(x, FH, FW, self.stride, self.pad) # (N * OH * OW, C * FH * FW) col_W = self.W.reshape(FN, -1).T # (FN, C * FH * FW) -> (C * FH * FW, FN) out = np.dot(col, col_W) + self.b # (N * OH * OW, FN) out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # (N, FN, OH, OW) self.x = x self.col = col self.col_W = col_W return out
def forward(self, x): N, C, H, W = x.shape out_h = int(1 + (H + 2 * self.pad - self.pool_h) / self.stride) out_w = int(1 + (W + 2 * self.pad - self.pool_w) / self.stride) col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) #(N * out_h * out_w, C * pool_h * pool_w) -> (N * out_h * out_w * C, pool_h * pool_w) col = col.reshape(-1, self.pool_h * self.pool_w) arg_max = np.argmax(col, axis=1) out = np.max(col, axis=1) out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) if self.flatten: out = out.reshape(N, -1) self.x = x self.arg_max = arg_max self.out_h = out_h self.out_w = out_w self.C = C return out
def update_z(self, beta, logPi, x, u, uPart, patchLst=None, IP=None): # fully observable patches if IP is None: IP = self.calcIterationParams(beta) D, K, GP, patchSize = self.D, self.K, self.GP, int(np.sqrt(self.D)) Px_minus_u = im2col(x, patchSize) - u if patchLst is not None: Px_minus_u = Px_minus_u[:, patchLst] NFull = Px_minus_u.shape[1] resp = np.tile(logPi + 0.5 * (IP.logdetSigma + GP.logdetLam), (NFull, 1)) for k in xrange(K): tmp = solve_triangular(beta**2 * IP.Rc[k], Px_minus_u, lower=IP.Rlower[k], check_finite=False) resp[:, k] += .5 * np.einsum('dn,dn->n', tmp, tmp) resp = np.argmax(resp, axis=1) # partially observable patches respPart = dict() for mask, idx in self.PgnPart.items(): maskLst = np.array(list(mask), dtype=bool) IPPart = self.calcIterationParams(beta, mask=maskLst) NPart = idx.shape[0] CT_Px_minus_u = np.zeros((D, NPart)) CT_Px_minus_u[maskLst, :] = x.ravel()[idx].T - uPart[mask] this_resp = np.tile( logPi + 0.5 * (IPPart.logdetSigma + GP.logdetLam), (NPart, 1)) for k in xrange(K): tmp = solve_triangular(beta**2 * IPPart.Rc[k], CT_Px_minus_u, lower=IPPart.Rlower[k], check_finite=False) this_resp[:, k] += .5 * np.einsum('dn,dn->n', tmp, tmp) respPart[mask] = np.argmax(this_resp, axis=1) return resp, respPart
def train_image_specific_topics(self, y, sigma, Niter=50, Kfresh=100, pixelMask=None): print('Training %d image-specific clusters...' % Kfresh) D, patchSize, GP = self.D, int(np.sqrt(self.D)), self.GP # gather fully observable patches if pixelMask is None: # gray-scale image denoising v = im2col(y, patchSize) else: # color image inpainting C = 3 patchMask = np.logical_not( np.any(im2col(pixelMask, patchSize), axis=0)) v = np.hstack( tuple([ im2col(y[:, :, c], patchSize)[:, patchMask] for c in xrange(C) ])) v -= np.mean(v, axis=0) v = v.T testData = GroupXData(X=v, doc_range=[0, len(v)], nDocTotal=1) testData.name = 'test_image_patches' # set up hyper-parameters and run Bregman k-means cached_B_name = 'models/HDP/B.mat' xBar = loadmat(cached_B_name)['Cov'] xBar2 = loadmat(cached_B_name)['Cov2'] tmp0 = (np.diag(xBar) + sigma**2)**2 tmp1 = np.diag(xBar2) + 6 * np.diag(xBar) * sigma**2 + 3 * sigma**4 nu = D + 3 + 2 * np.sum(tmp0) / np.sum(tmp1 - tmp0) B = (nu - D - 1) * (xBar + sigma**2 * np.eye(D)) obsModel = ZeroMeanGaussObsModel(D=D, min_covar=1e-8, inferType='memoVB', B=B, nu=nu) Z, Mu, Lscores = runKMeans_BregmanDiv(testData.X, Kfresh, obsModel, Niter=Niter, assert_monotonic=False) Korig = self.K Kall = np.max(Z) + Korig + 1 Kfresh = Kall - Korig Z += Korig # load SuffStats of training images trainSS = loadSuffStatBag('models/HDP/SS.dump') trainSS.insertEmptyComps(Kfresh) # construct SuffStats of the test image DocTopicCount = np.bincount(Z, minlength=int(Kall)).reshape((1, Kall)) DocTopicCount = np.array(DocTopicCount, dtype=np.float64) resp = np.zeros((len(Z), Kall)) resp[np.arange(len(Z)), Z] = 1.0 testLP = dict(resp=resp, DocTopicCount=DocTopicCount) alphaPi0 = np.hstack( (GP.alphaPi0, GP.alphaPi0Rem / (Kfresh + 1) * np.ones(Kfresh))) alphaPi0Rem = GP.alphaPi0Rem / (Kfresh + 1) testLP = updateLPGivenDocTopicCount(testLP, DocTopicCount, alphaPi0, alphaPi0Rem) testSS = self.patchModel.get_global_suff_stats( testData, testLP, doPrecompEntropy=1, doTrackTruncationGrowth=1) xxT = np.zeros((Kall, D, D)) for k in xrange(Korig, Kall): idx = Z == k tmp = np.einsum('nd,ne->de', v[idx], v[idx]) tmp -= testSS.N[k] * sigma**2 * np.eye(D) val, vec = np.linalg.eig(tmp) val[val < EPS] = EPS xxT[k] = np.dot(vec, np.dot(np.diag(val), vec.T)) testSS.setField('xxT', xxT, dims=('K', 'D', 'D')) testSS.setUIDs(trainSS.uids) # combine training and test SS; update model parameters combinedSS = trainSS + testSS self.patchModel.update_global_params(combinedSS) self.calcGlobalParams()
def inpaint(self, y, pixelMask, T=20, **kwargs): self.print_inpainting_info(y) betas = 1.0 / np.sqrt(10 * np.array([1, 2, 16, 128, 512])) D, patchSize = self.D, int(np.sqrt(self.D)) if np.any(pixelMask[:patchSize]) or np.any(pixelMask[:, :patchSize]) or \ np.any(pixelMask[-patchSize+1:]) or np.any(pixelMask[:, -patchSize+1:]): raise ValueError( 'The current implementation does not support inpainting boundary pixels!' ) self.PgnPart = dict() self.train_image_specific_topics(y, betas[-1], pixelMask=pixelMask, **kwargs) mask_unseen = np.any(im2col(pixelMask, patchSize), axis=0) mask_seen = np.logical_not(mask_unseen) result = y.copy() result[pixelMask] = self.GP.r if y.ndim == 3: C = 3 else: raise TypeError( 'The current implementation only supports color-image inpainting!' ) for c in xrange(C): print('Inpainting channel %d/%d...' % (c + 1, C)) x, u, uPart, logPi = self.init_x_u_logPi(result[:, :, c]) resp_seen, respPart = self.update_z(betas[-1], logPi, x, u, uPart, patchLst=mask_seen) v_seen, vPart = self.update_v(betas[-1], x, u, uPart, resp_seen, respPart, patchLst=mask_seen) u_seen, uPart = self.update_u(betas[-1], x, v_seen, vPart, patchLst=mask_seen) for i, beta in enumerate(betas): print(' beta value %d/%d' % (i + 1, len(betas))) IP = self.calcIterationParams(beta) for t in xrange(T): resp_unseen, respPart = self.update_z(beta, logPi, x, u, uPart, patchLst=mask_unseen, IP=IP) v_unseen, vPart = self.update_v(beta, x, u, uPart, resp_unseen, respPart, patchLst=mask_unseen, IP=IP) u_unseen, uPart = self.update_u(beta, x, v_unseen, vPart, patchLst=mask_unseen) logPi = self.update_pi( self.get_N(np.concatenate((resp_seen, resp_unseen)), respPart)) NFull = len(mask_seen) v, u = np.zeros((NFull, D)), np.zeros(NFull) v[mask_seen] = v_seen v[mask_unseen] = v_unseen u[mask_seen] = u_seen u[mask_unseen] = u_unseen x = self.update_x_by_inpainting(result[:, :, c], v, u, pixelMask) print ' inner iteration %d/%d' % (t + 1, T) result[:, :, c] = x result = ycbcr2rgb(self.clip_pixel_intensity(result)) return result
def forward(self, x): #print('a') # Convolution filternum, channel, filter_h, filter_w = self.W.shape batchsize, channel, height, width = x.shape conv_out_h = 1 + int( (height + 2 * self.conv_pad - filter_h) / self.conv_stride) conv_out_w = 1 + int( (width + 2 * self.conv_pad - filter_w) / self.conv_stride) self.x = x self.x_col = im2col(x, filter_h, filter_w, self.conv_stride, self.conv_pad) self.W_col = self.W.reshape(filternum, -1).T self.u_col = np.dot(self.x_col, self.W_col) + self.b self.u = self.u_col.reshape(batchsize, conv_out_h, conv_out_w, -1).transpose(0, 3, 1, 2) self.conv_y_col = activation(self.mag * self.u_col) out1 = self.conv_y_col if self.batchnorm: if self.batchnorm: self.batch_size, self.out_size = self.conv_y_col.shape self.mu = np.abs(self.conv_y_col).mean(axis=0).reshape((1, -1)) self.mu = g_repmat(self.mu, self.batch_size, 1) self.min = np.abs(self.conv_y_col).min(axis=0).reshape((1, -1)) self.min = g_repmat(self.min, self.batch_size, 1) self.sigma_sq = np.mean((np.abs(self.conv_y_col) - self.mu)**2, axis=0).reshape((1, -1)) self.sigma_sq = g_repmat(self.sigma_sq, self.batch_size, 1) self.v_amp = (np.abs(self.conv_y_col) - self.min + self.epsilon ) / np.sqrt(self.sigma_sq + self.epsilon) out1 = self.v_amp * np.exp(1.j * np.angle(self.conv_y_col)) self.conv_y = out1.reshape(batchsize, conv_out_h, conv_out_w, -1).transpose(0, 3, 1, 2) out = self.conv_y # Pooling if self.pool_or_not: batchsize, channel, height, width = self.conv_y.shape pool_out_h = 1 + int( (height + 2 * self.pool_pad - self.pool_h) / self.pool_stride) pool_out_w = 1 + int( (width + 2 * self.pool_pad - self.pool_w) / self.pool_stride) self.pool_x_col = im2col(self.conv_y, self.pool_h, self.pool_w, self.pool_stride, self.pool_pad) self.pool_x_col = self.pool_x_col.reshape( -1, self.pool_h * self.pool_w) if self.pool == 'max': self.arg_max = np.argmax(np.abs(self.pool_x_col), axis=1) self.pool_y = np.zeros(self.arg_max.shape, dtype='complex64') self.pool_y[np.arange( self.arg_max.shape[0])] = self.pool_x_col[ np.arange(self.arg_max.shape[0]), self.arg_max[np.arange(self.arg_max.shape[0])]] elif self.pool == 'avg': self.pool_y = np.mean(self.pool_x_col, axis=1) self.pool_y = self.pool_y.reshape(batchsize, pool_out_h, pool_out_w, channel).transpose(0, 3, 1, 2) out = self.pool_y #print('b') return out
def forward(self, x, train_flg=False): # Convolution FN, C, FH, FW = self.W.shape N, C, H, W = x.shape if self.batchnorm: if self.BN_firstflg: gamma = np.ones((C * H * W)) beta = np.zeros((C * H * W)) self.BN = BatchNormalization(gamma, beta, optimizer=self.optimizer) self.BN.forward(x, train_flg) self.BN_firstflg = False else: self.BN.forward(x, train_flg) out_h = 1 + int((H + 2 * self.conv_pad - FH) / self.conv_stride) out_w = 1 + int((W + 2 * self.conv_pad - FW) / self.conv_stride) col = im2col(x, FH, FW, self.conv_stride, self.conv_pad) col_W = self.W.reshape(FN, -1).T self.u_col = np.dot(col, col_W) + self.b self.u = self.u_col.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.x_col = col #self.W_col = col_W if self.activation == 'tanh': self.conv_y = np.tanh(self.u) elif self.activation == 'Relu': self.mask = (self.u <= 0) self.conv_y = self.u self.conv_y[self.mask] = 0 else: print('Error : Activation function ' + self.activation + ' is undefined.') if not self.pool_or_not: y = self.conv_y if self.pool_or_not: # Pooling N, C, H, W = self.conv_y.shape out_h = int(1 + (H - self.pool_h) / self.pool_stride) out_w = int(1 + (W - self.pool_w) / self.pool_stride) self.conv_y_col = im2col(self.conv_y, self.pool_h, self.pool_w, self.pool_stride, self.pool_pad) self.conv_y_col = self.conv_y_col.reshape( -1, self.pool_h * self.pool_w) if self.pool == 'max': arg_max = np.argmax(self.conv_y_col, axis=1) self.pool_y = np.max(self.conv_y_col, axis=1) self.arg_max = arg_max elif self.pool == 'avg': self.pool_y = np.mean(self.conv_y_col, axis=1) self.pool_y = self.pool_y.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) y = self.pool_y self.y = y if self.dropout: y = self.DO.forward(y, train_flg) if self.gap_layer: y = self.GAP.forward(y) return y
import numpy as np from util import im2col # X1 : Data 1 x1 = np.random.rand(1, 3, 7, 7) # Data, Channel, Height, Width print("x1:", x1.shape) col1 = im2col(x1, 5, 5, stride=1, pad=0) # filter(5x5) print(col1.shape) # (9,75) # OH * OW (* Data), FilterH * FilterW * Channel #X2 : Data 10 x2 = np.random.rand(10, 3, 7, 7) # Data, Channel, Height, Width print("x2:", x2.shape) col2 = im2col(x2, 5, 5, stride=1, pad=0) # filter(5x5) print(col2.shape) # (9,75) # OH * OW (* Data), FilterH * FilterW * Chann #X3 x3 = np.random.randint(10, size=(1, 2, 3, 3)) print("x3:", x3.shape) print(x3) col3 = im2col(x3, 2, 2, stride=1, pad=0) print(col3.shape) print(col3)
def cnn(): x1 = np.random.rand(1, 3, 7, 7) col1 = im2col(x1, 5, 5, stride=1, pad=0) print(col1.shape)
self.x = None self.col = None self.col_W = None # 가중치와 편향 매개변수의 기울기 self.dW = None self.db = None def forward(self, x): FN, C, FH, FW = self.W.shape N, C, H, W = x.shape out_h = 1 + int((H + 2*self.pad - FH) / self.stride) out_w = 1 + int((W + 2*self.pad - FW) / self.stride) col = im2col(x, FH, FW, self.stride, self.pad) col_W = self.W.reshape(FN, -1).T # 내적 하기 위해, -1은 다차원 배열의 원소 수가 변환 후에도 똑같이 유지 되도록 묶어줌. out = np.dot(col, col_W) + self.b out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) self.x = x self.col = col self.col_W = col_W return out def backward(self, dout): FN, C, FH, FW = self.W.shape dout = dout.transpose(0,2,3,1).reshape(-1, FN)