def backprop(self, pre_grad, *args, **kwargs): new_h, new_w = self.outshape[-2:] pool_h, pool_w = self.pool_size length = np.prod(self.pool_size) layer_grads = _zero(self.inshape) if np.ndim(pre_grad) == 4: nb_batch, nb_axis, _, _ = pre_grad.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): h1, w1 = h * pool_h, w * pool_w h2, w2 = h1 + pool_h, w1 + pool_w layer_grads[a, b, h1:h2, w1:w2] = pre_grad[a, b, h, w] / length elif np.ndim(pre_grad) == 3: nb_batch, _, _ = pre_grad.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): h_shift, w_shift = h * pool_h, w * pool_w layer_grads[a, h_shift: h_shift + pool_h, w_shift: w_shift + pool_w] = \ pre_grad[a, h, w] / length else: raise ValueError() return layer_grads
def call(self, X, *args, **kwargs): self.inshape = X.shape pool_h, pool_w = self.pool_size new_h, new_w = self.outshape[-2:] # forward outputs = _zero(self.inshape[:-2] + self.outshape[-2:]) if np.ndim(X) == 4: nb_batch, nb_axis, _, _ = X.shape for a in np.arange(nb_batch): for b in np.arange(nb_axis): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, b, h, w] = np.mean(X[a, b, h:h + pool_h, w:w + pool_w]) elif np.ndim(X) == 3: nb_batch, _, _ = X.shape for a in np.arange(nb_batch): for h in np.arange(new_h): for w in np.arange(new_w): outputs[a, h, w] = np.mean(X[a, h:h + pool_h, w:w + pool_w]) else: raise ValueError() return outputs
def update(self, params, grads): # init cache if self.cache is None: self.cache = [_zero(p.shape) for p in params] # update parameters for i, (c, p, g) in enumerate(zip(self.cache, params, grads)): c = self.rho * c + (1 - self.rho) * np.power(g, 2) p -= (self.lr * g / np.sqrt(c + self.epsilon)) self.cache[i] = c
def connect(self, prev_layer=None): if prev_layer: if len(prev_layer.outshape) != 2: raise ValueError('Previous layers outshape is incompatible') self.inshape = prev_layer.outshape[-1] elif not self.inshape: raise ValueError('inshape must be given to first layer of network') self.shape = self.inshape, self.outshape[-1] self.w = self.init(self.shape) self.b = _zero((self.outshape[-1], )) self.dw = None self.db = None
def backprop(self, grad, *args, **kwargs): batch_size, depth, input_h, input_w = self.last_input.shape out_h, out_w = self.outshape[2:] kernel_h, kernel_w = self.kernel_size # gradients self.dw = _zero(self.w.shape) self.db = _zero(self.b.shape) delta = grad * self.activation.derivative() # dw for r in np.arange(self.nb_kernel): for t in np.arange(depth): for h in np.arange(kernel_h): for w in np.arange(kernel_w): input_window = self.last_input[:, t, h:input_h - kernel_h + h + 1:self.stride, w:input_w - kernel_w + w + 1:self.stride] delta_window = delta[:, r] self.dw[r, t, h, w] = np.sum(input_window * delta_window) / batch_size # db for r in np.arange(self.nb_kernel): self.db[r] = np.sum(delta[:, r]) / batch_size # dX if not self.first_layer: layer_grads = _zero(self.last_input.shape) for b in np.arange(batch_size): for r in np.arange(self.nb_kernel): for t in np.arange(depth): for h in np.arange(out_h): for w in np.arange(out_w): h1, w1 = h * self.stride, w * self.stride h2, w2 = h1 + kernel_h, w1 + kernel_w layer_grads[b, t, h1:h2, w1:w2] += self.w[r, t] * delta[b, r, h, w] return layer_grads
def connect(self, prev_layer=None): if prev_layer: self.inshape = prev_layer.outshape elif not self.inshape: raise ValueError('inshape must be given to first layer of network') prev_nb_kernel = self.inshape[1] kernel_h, kernel_w = self.kernel_size self.w = self.init((self.nb_kernel, prev_nb_kernel, kernel_h, kernel_w)) self.b = _zero((self.nb_kernel,)) prev_kh, prev_kw = self.inshape[2], self.inshape[3] height = (prev_kh - kernel_h) // self.stride + 1 width = (prev_kw - kernel_w) // self.stride + 1 self.outshape = (self.inshape[0], self.nb_kernel, height, width)
def call(self, X, *args, **kwargs): self.last_input = X batch_size, depth, height, width = X.shape kernel_h, kernel_w = self.kernel_size out_h, out_w = self.outshape[2:] outputs = _zero((batch_size, self.nb_kernel, out_h, out_w)) for x in np.arange(batch_size): for y in np.arange(self.nb_kernel): for h in np.arange(out_h): for w in np.arange(out_w): h1, w1 = h * self.stride, w * self.stride h2, w2 = h1 + kernel_h, w1 + kernel_w patch = X[x, :, h1: h2, w1: w2] conv_product = patch * self.w[y] outputs[x, y, h, w] = np.sum(conv_product) + self.b[y] self.last_output = self.activation.call(outputs) return self.last_output