def eval_c_grad(layer, content): nonlocal loss feat = content.features[layer][:, start_[0]:end[0], start_[1]:end[1]] c_grad = self.data[layer] - feat loss += lw * content_weight[layer] * norm2(c_grad) saxpy(lw * content_weight[layer], normalize(c_grad), self.diff[layer])
def eval_s_grad(layer, style): nonlocal loss current_gram = gram_matrix(self.data[layer]) n, mh, mw = self.data[layer].shape feat = self.data[layer].reshape((n, mh * mw)) gram_diff = current_gram - style.grams[layer] s_grad = self._arr_pool.array_like(feat) ssymm(gram_diff, feat, c=s_grad) s_grad = s_grad.reshape((n, mh, mw)) loss += lw * style_weight[layer] * norm2(gram_diff) / len( self.styles) saxpy(lw * style_weight[layer] / len(self.styles), normalize(s_grad), self.diff[layer])
def update(self, opfunc): """Returns a step's parameter update given a loss/gradient evaluation function.""" # Step size decay step_size = self.step_size / self.i**self.power self.i += self.decay loss, grad = opfunc(self.params) # Adam self.g1.update(grad) self.g2.update(grad**2) step = self.g1.get() / (np.sqrt(self.g2.get()) + EPS) saxpy(-step_size, step, self.params) # Iterate averaging self.p1.update(self.params) return roll2(self.p1.get(), -self.xy), loss
def inv_hv(self, p): """Computes the product of a vector with an approximation of the inverse Hessian.""" p = p.copy() alphas = [] for s, y, sy in zip(reversed(self.sk), reversed(self.yk), reversed(self.syk)): alphas.append(sdot(s, p) / sy) saxpy(-alphas[-1], y, p) if self.sk: sy, y = self.syk[-1], self.yk[-1] p *= sy / sdot(y, y) for s, y, sy, alpha in zip(self.sk, self.yk, self.syk, reversed(alphas)): beta = sdot(y, p) / sy saxpy(alpha - beta, s, p) return p
def prepare_features(self, pool, layers, tile_size=512, passes=10): """Averages the set of feature maps for an image over multiple passes to obscure tiling.""" img_size = np.array(self.img.shape[-2:]) if max(*img_size) <= tile_size: passes = 1 features = {} for i in range(passes): xy = np.array((0, 0)) if i > 0: xy = np.int32(np.random.uniform(size=2) * img_size) // 32 self.roll(xy) self.roll_features(features, xy) feats = self.eval_features_once(pool, layers, tile_size) for layer in layers: if i == 0: features[layer] = feats[layer] / passes else: saxpy(1 / passes, feats[layer], features[layer]) self.roll(-xy) self.roll_features(features, -xy) return features
def eval_loss_and_grad(self, img, sc_grad_args): """Returns the summed loss and gradient.""" old_img = self.model.img self.model.img = img lw = self.layer_weights['data'] # Compute style+content gradient loss, grad = self.model.eval_sc_grad(*sc_grad_args) # Compute total variation gradient if ARGS.tv_weight: tv_loss, tv_grad = tv_norm(self.model.img / 127.5, beta=ARGS.tv_power) loss += lw * ARGS.tv_weight * tv_loss saxpy(lw * ARGS.tv_weight, tv_grad, grad) # Compute SWT norm and gradient if ARGS.swt_weight: swt_loss, swt_grad = swt_norm(self.model.img / 127.5, ARGS.swt_wavelet, ARGS.swt_levels, p=ARGS.swt_power) loss += lw * ARGS.swt_weight * swt_loss saxpy(lw * ARGS.swt_weight, swt_grad, grad) # Compute p-norm regularizer gradient (from jcjohnson/cnn-vis and [3]) if ARGS.p_weight: p_loss, p_grad = p_norm( (self.model.img + self.model.mean - 127.5) / 127.5, p=ARGS.p_power) loss += lw * ARGS.p_weight * p_loss saxpy(lw * ARGS.p_weight, p_grad, grad) # Compute auxiliary image gradient if self.aux_image is not None: aux_grad = (self.model.img - self.aux_image) / 127.5 loss += lw * ARGS.aux_weight * norm2(aux_grad) saxpy(lw * ARGS.aux_weight, aux_grad, grad) self.model.img = old_img return loss, grad
def eval_sc_grad_tile(self, img, start, layers, content_layers, style_layers, dd_layers, layer_weights, content_weight, style_weight, dd_weight): """Evaluates an individual style+content gradient tile.""" self.net.blobs['data'].reshape(1, 3, *img.shape[-2:]) self.data['data'] = img loss = 0 # Prepare gradient buffers and run the model forward for layer in layers: self.diff[layer] = 0 self.net.forward(end=layers[0]) self.data[layers[0]] = np.maximum(0, self.data[layers[0]]) for i, layer in enumerate(layers): lw = layer_weights[layer] scale, _ = self.layer_info(layer) start_ = start // scale end = start_ + np.array(self.data[layer].shape[-2:]) def eval_c_grad(layer, content): nonlocal loss feat = content.features[layer][:, start_[0]:end[0], start_[1]:end[1]] c_grad = self.data[layer] - feat loss += lw * content_weight[layer] * norm2(c_grad) saxpy(lw * content_weight[layer], normalize(c_grad), self.diff[layer]) def eval_s_grad(layer, style): nonlocal loss current_gram = gram_matrix(self.data[layer]) n, mh, mw = self.data[layer].shape feat = self.data[layer].reshape((n, mh * mw)) gram_diff = current_gram - style.grams[layer] s_grad = self._arr_pool.array_like(feat) ssymm(gram_diff, feat, c=s_grad) s_grad = s_grad.reshape((n, mh, mw)) loss += lw * style_weight[layer] * norm2(gram_diff) / len( self.styles) saxpy(lw * style_weight[layer] / len(self.styles), normalize(s_grad), self.diff[layer]) # Compute the content and style gradients if layer in content_layers: for content in self.contents: eval_c_grad(layer, content) if layer in style_layers: for style in self.styles: eval_s_grad(layer, style) if layer in dd_layers: loss -= lw * dd_weight[layer] * norm2(self.data[layer]) saxpy(-lw * dd_weight[layer], normalize(self.data[layer]), self.diff[layer]) # Run the model backward if i + 1 == len(layers): self.net.backward(start=layer) else: self.net.backward(start=layer, end=layers[i + 1]) return loss, self.diff['data']