def test_unconstrained(self): def objective(x): """Rosenbrock function. (Carl Edward Rasmussen, 2001-07-21). f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 Args: x: a Variable Returns: f: a tensor (objective value) """ d = array_ops.size(x) s = math_ops.add( 100 * math_ops.square( math_ops.subtract( array_ops.strided_slice(x, [1], [d]), math_ops.square( array_ops.strided_slice(x, [0], [d - 1])))), math_ops.square( math_ops.subtract(1.0, array_ops.strided_slice(x, [0], [d - 1])))) return math_ops.reduce_sum(s) dimension = 5 x = variables.Variable(array_ops.zeros(dimension)) optimizer = external_optimizer.ScipyOptimizerInterface(objective(x)) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(dimension), sess.run(x))
def run_optimization(self, total_iterations=1000, checkpoint_iterations=100, result_path='result'): self._setup_result_path(result_path) self.iters = 0 self.checkpoint_iterations = checkpoint_iterations optimizer = external_optimizer.ScipyOptimizerInterface(self._total_loss, var_list=[self._stylized_image], options={'maxiter': total_iterations}) self._sess.run(self._stylized_image.initializer) print('Started optimization\n') self._start_time = time.time() optimizer.minimize(session=self._sess, loss_callback=self._output_results, fetches=[self._total_loss, self._content_loss, self._style_loss, self._tv_loss, self._wtd_layerwise_content_losses_dict.values(), self._wtd_layerwise_style_losses_dict.values(), self._stylized_image])
def test_unconstrained(self): dimension = 5 x = variables.Variable(array_ops.zeros(dimension)) optimizer = external_optimizer.ScipyOptimizerInterface( self._objective(x)) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(dimension), sess.run(x))
def test_vector_bounds(self): vector_initial_value = [7., 7.] vector = variables.Variable(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) var_to_bounds = {vector: ([None, 2.], None)} optimizer = external_optimizer.ScipyOptimizerInterface( loss, var_to_bounds=var_to_bounds) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose([0., 2.], sess.run(vector))
def test_optimizer_kwargs(self): # Checks that the 'method' argument is stil present # after running optimizer.minimize(). # Bug reference: b/64065260 vector_initial_value = [7., 7.] vector = variables.VariableV1(vector_initial_value, 'vector') loss = math_ops.reduce_sum(math_ops.square(vector)) optimizer = external_optimizer.ScipyOptimizerInterface(loss, method='SLSQP') with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) method = optimizer.optimizer_kwargs.get('method') self.assertEqual('SLSQP', method)
def test_scalar_bounds(self): vector_initial_value = [7., 7.] vector = variables.Variable(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) # Make the minimum value of each component be 1. var_to_bounds = {vector: (1., np.infty)} optimizer = external_optimizer.ScipyOptimizerInterface( loss, var_to_bounds=var_to_bounds) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(2), sess.run(vector))
def _test_optimization_method(self, method, options, rtol=1e-5, atol=1e-5, dimension=5): x = variables.Variable(array_ops.zeros(dimension)) optimizer = external_optimizer.ScipyOptimizerInterface( self._objective(x), method=method, options=options) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(dimension), sess.run(x), rtol=rtol, atol=atol)
def test_nonlinear_programming(self): vector_initial_value = [7., 7.] vector = variables.Variable(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) # Ensure y = 1. equalities = [vector[1] - 1.] # Ensure x >= 1. Thus optimum should be at (1, 1). inequalities = [vector[0] - 1.] optimizer = external_optimizer.ScipyOptimizerInterface( loss, equalities=equalities, inequalities=inequalities, method='SLSQP') with self.test_session() as sess: sess.run(variables.global_variables_initializer()) optimizer.minimize(sess) self.assertAllClose(np.ones(2), sess.run(vector))
def test_callbacks(self): vector_val = np.array([7., -2.], dtype=np.float32) vector = variables.VariableV1(vector_val, 'vector') minimum_location_val = np.arange(2) minimum_location = constant_op.constant(minimum_location_val, dtype=dtypes.float32) loss = math_ops.reduce_sum( math_ops.square(vector - minimum_location)) / 2. loss_val_first = ((vector_val - minimum_location_val)**2).sum() / 2. optimizer = external_optimizer.ScipyOptimizerInterface(loss, method='SLSQP') with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) initial_vector_val = sess.run(vector) extra_fetches = [loss] step_callback = test.mock.Mock() loss_callback = test.mock.Mock() optimizer.minimize(sess, fetches=extra_fetches, loss_callback=loss_callback, step_callback=step_callback) loss_val_last = sess.run(loss) call_first = test.mock.call(loss_val_first) call_last = test.mock.call(loss_val_last) loss_calls = [call_first, call_last] loss_callback.assert_has_calls(loss_calls, any_order=True) args, _ = step_callback.call_args self.assertAllClose(minimum_location_val, args[0])
def stylize(network_file, network_type, initial, initial_noiseblend, content, styles, preserve_colors_coeff, preserve_colors_prior, iterations, content_weight, content_weight_blend, style_weight, style_distr_weight, style_layer_weight_exp, style_blend_weights, style_feat_type, tv_weight, learning_rate, beta1, beta2, epsilon, ashift, pooling, optimizer, print_iterations=None, checkpoint_iterations=None): """ Stylize images. This function yields tuples (iteration, image); `iteration` is None if this is the final image (the last iteration). Other tuples are yielded every `checkpoint_iterations` iterations. :rtype: iterator[tuple[int|None,image]] """ activation_shift = ashift distribution_loss = (style_distr_weight != 0.0) shape = (1, ) + content.shape style_shapes = [(1, ) + style.shape for style in styles] content_features = {} style_features = [{} for _ in styles] style_distr = [{} for _ in styles] if preserve_colors_prior == True: style_cnt = 0 for i in range(len(styles)): styles[i] = colortransfer(content, styles[i], 'hsv') Image.fromarray(styles[i]).save("precolor_style%d.jpg" % (style_cnt), quality=95) style_cnt += 1 if network_type == 'sqz': net_module = sqz else: net_module = vgg STYLE_FEATURE_TYPES_GRAM = 0 STYLE_FEATURE_TYPES_MEAN = 1 STYLE_FEATURE_TYPES_DISTR = 2 style_features_type = STYLE_FEATURE_TYPES_GRAM if style_feat_type == 'mean': style_features_type = STYLE_FEATURE_TYPES_MEAN elif style_feat_type == 'distr': style_features_type = STYLE_FEATURE_TYPES_DISTR CONTENT_LAYERS = net_module.get_content_layers() STYLE_LAYERS = net_module.get_style_layers() vgg_weights, vgg_mean_pixel = net_module.load_net(network_file) if vgg_weights == None: print("Failed to load network\n\n") sys.exit(0) # calculate content layer weights clw = (content_weight_blend, 1.0 - content_weight_blend) content_layer_weight_exp = clw[1] / clw[0] layer_weight = 1.0 content_layers_weights = {} for content_layer in CONTENT_LAYERS: content_layers_weights[content_layer] = layer_weight layer_weight *= content_layer_weight_exp # normalize content layer weights layer_weights_sum = 0 for content_layer in CONTENT_LAYERS: layer_weights_sum += content_layers_weights[content_layer] for content_layer in CONTENT_LAYERS: content_layers_weights[content_layer] /= layer_weights_sum # calculate style layer weights layer_weight = 1.0 style_layers_weights = {} for style_layer in STYLE_LAYERS: style_layers_weights[style_layer] = layer_weight layer_weight *= style_layer_weight_exp # normalize style layer weights layer_weights_sum = 0 for style_layer in STYLE_LAYERS: layer_weights_sum += style_layers_weights[style_layer] for style_layer in STYLE_LAYERS: style_layers_weights[style_layer] /= layer_weights_sum # compute content features in feedforward mode g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder(common.get_dtype_tf(), shape=shape) net = net_module.net_preloaded(vgg_weights, image, pooling) content_pre = np.array( [net_module.preprocess(content, vgg_mean_pixel)]) for layer in CONTENT_LAYERS: content_features[layer] = net[layer].eval( feed_dict={image: content_pre}) # compute style features in feedforward mode for i in range(len(styles)): g = tf.Graph() with g.as_default(), g.device('/cpu:0'), tf.Session() as sess: image = tf.placeholder(common.get_dtype_tf(), shape=style_shapes[i]) net = net_module.net_preloaded(vgg_weights, image, pooling) style_pre = np.array( [net_module.preprocess(styles[i], vgg_mean_pixel)]) for layer in STYLE_LAYERS: features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) if distribution_loss: style_distr[i][layer] = (np.mean(features, axis=0), np.std(features, axis=0)) if style_features_type == STYLE_FEATURE_TYPES_GRAM: # Gram matrix # activation shift features += np.full( features.shape, common.get_dtype_np()(activation_shift)) gram = np.matmul(features.T, features) / features.size style_features[i][layer] = gram elif style_features_type == STYLE_FEATURE_TYPES_MEAN: # Averaging style_features[i][layer] = np.mean( features, axis=0) # / features.size elif style_features_type == STYLE_FEATURE_TYPES_DISTR: # Full distribution parameters loss style_features[i][layer] = (np.mean(features, axis=0), np.std(features, axis=0)) initial_content_noise_coeff = 1.0 - initial_noiseblend NOISE_AMP = 64.0 * 0.256 # make stylized image using backpropogation with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) if initial is None: noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = tf.random_normal(shape) * NOISE_AMP else: initial = np.array( [net_module.preprocess(initial, vgg_mean_pixel)]) initial = initial.astype(common.get_dtype_np()) noise = np.random.normal(size=shape, scale=np.std(content) * 0.1) initial = (initial) * initial_content_noise_coeff + ( tf.random_normal(shape) * NOISE_AMP) * (1.0 - initial_content_noise_coeff) image = tf.Variable(initial) net = net_module.net_preloaded(vgg_weights, image, pooling) # content loss content_loss = 0 content_losses = [] for content_layer in CONTENT_LAYERS: content_losses.append( content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(net[content_layer] - content_features[content_layer]) / content_features[content_layer].size)) content_loss += reduce(tf.add, content_losses) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] style_distr_loss = 0 for style_layer in STYLE_LAYERS: layer = net[style_layer] _, height, width, number = map(lambda i: i.value, layer.get_shape()) size = height * width * number feats = tf.reshape(layer, (-1, number)) if distribution_loss: print("Style Layer: %s" % (style_layer)) EPS = 1e-5 style_target_distr = style_distr[i][style_layer] cur_mean, cur_var = tf.nn.moments(feats, axes=[0]) cur_distr = (cur_mean, tf.sqrt(tf.maximum(cur_var, tf.fill([1], EPS)))) feats_shape = feats.get_shape() distr_losses = [] feats_delta = feats - tf.add( tf.multiply( tf.div( style_target_distr[1], cur_distr[1] + tf.fill(cur_distr[1].get_shape(), EPS)), tf.subtract(feats, cur_distr[0])), style_target_distr[0]) style_distr_loss += style_distr_weight * tf.nn.l2_loss( feats_delta) / tf.cast(feats_shape[0] * feats_shape[1], common.get_dtype_tf()) if style_features_type == STYLE_FEATURE_TYPES_GRAM: # Gram matrix # activation shift feats += tf.fill( feats.get_shape(), tf.cast(activation_shift, common.get_dtype_tf())) gram = tf.matmul(tf.transpose(feats), feats) / size style_gram = style_features[i][style_layer] style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size) elif style_features_type == STYLE_FEATURE_TYPES_MEAN: # Averaging style_target_features = style_features[i][style_layer] style_current_features = tf.reduce_mean(feats, axis=0) style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(style_current_features - style_target_features)) elif style_features_type == STYLE_FEATURE_TYPES_DISTR: # Full distribution parameters loss style_target_features = style_features[i][style_layer] cur_mean, cur_var = tf.nn.moments(feats, axes=[0]) EPS = 1e-5 style_current_features = (cur_mean, tf.sqrt( tf.maximum( cur_var, tf.fill([1], EPS)))) style_losses.append( style_layers_weights[style_layer] * 2 * (tf.nn.l2_loss(style_current_features[0] - style_target_features[0]) + tf.nn.l2_loss(style_current_features[1] - style_target_features[1]))) style_loss += style_weight * style_blend_weights[i] * reduce( tf.add, style_losses) if distribution_loss: style_loss += style_distr_loss # total variation denoising tv_y_size = _tensor_size(image[:, 1:, :, :]) tv_x_size = _tensor_size(image[:, :, 1:, :]) tv_loss = tv_weight * 2 * ( (tf.nn.l2_loss(image[:, 1:, :, :] - image[:, :shape[1] - 1, :, :]) / tv_y_size) + (tf.nn.l2_loss(image[:, :, 1:, :] - image[:, :, :shape[2] - 1, :]) / tv_x_size)) # overall loss loss = tf.cast(content_loss, common.get_dtype_tf()) + tf.cast( style_loss, common.get_dtype_tf()) + tf.cast( tv_loss, common.get_dtype_tf()) # optimizer setup def iter_callback(data=None): iter_callback.opt_iter += 1 stderr.write('Iteration %4d/%4d (%f)\n' % (iter_callback.opt_iter, iter_callback.max_iter, time.time() - iter_callback.time0)) iter_callback.time0 = time.time() iter_callback.opt_iter = 0 iter_callback.max_iter = iterations print("") use_scipy_optimizer = False if optimizer == 'adam': print("Using TF Adam optimizer (%f)" % (learning_rate)) # Sometimes Adam can benefit from the learning rate decay too, it can be visually noted when due to # too large remaining learning rate, at higher iterations picture could be transformed to a worse state if False: LEARNING_RATE_INITIAL = learning_rate LEARNING_DECAY_STEPS = 10 LEARNING_DECAY_BASE = 0.98 learning_rate_decay = tf.train.exponential_decay( LEARNING_RATE_INITIAL, global_step, LEARNING_DECAY_STEPS, LEARNING_DECAY_BASE, staircase=True) train_step = tf.train.AdamOptimizer( learning_rate_decay, beta1, beta2, epsilon).minimize(loss, global_step=global_step) else: train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss) else: # setting up L-BFGS iteration parameters # there are two iteration kinds: outer iteration (our loop) and inner iteration, or subiteration (the SciPy optimizer loop) # ideally, we don't want to split the internal subiteration loop, as this leads to loss of precision # hence in default case of L-BFGS we would want to have 1 our iteration and `iterations` subiterations subiterations = iterations if checkpoint_iterations: stderr.write( 'WARNING: checkpoint_iterations is used with L-BFGS optimizer - this will decrease the precision\n' ) stderr.write( ' due to the need to split iteration seuqence to save intermediate image and show progress\n' ) if checkpoint_iterations < 50: stderr.write( 'WARNING: checkpoint_iterations cannot be lower than 50 when using L-BFGS due to precision losses\n' ) checkpoint_iterations = 50 # we don't want to break up iteration sequence just for the statistics output if print_iterations: stderr.write( 'WARNING: both checkpoint_iterations and print_iterations are set for L-BFGS, focring print_iterations=checkpoint_iterations\n' ) print_iterations = checkpoint_iterations if subiterations > checkpoint_iterations: subiterations = checkpoint_iterations elif print_iterations: stderr.write( 'WARNING: print_iterations is used with L-BFGS optimizer - this will decrease the precision\n' ) stderr.write( ' due to the need to split iteration seuqence to save intermediate image and show progress\n' ) if print_iterations < 50: stderr.write( 'WARNING: print_iterations cannot be lower than 50 when using L-BFGS due to precision losses\n' ) print_iterations = 50 if subiterations > print_iterations: subiterations = print_iterations if subiterations != iterations and subiterations < iterations: # subiterations number is limited, we need to get the total amount of L-BFGS subeterations as close to `iterations` as possible iterations = iterations // subiterations + 1 print_iterations = 1 checkpoint_iterations = 1 else: # subiterations number is unlimited, we only need one training iteration iterations = 1 iter_callback.max_iter = iterations * subiterations use_scipy_optimizer = True if optimizer == 'cg': print("Using SciPy CG optimizer") scipy_optimizer = external_optimizer.ScipyOptimizerInterface( loss, method='CG', callback=iter_callback, options={ 'disp': None, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': subiterations, }) else: print("Using SciPy L-BFGS optimizer") scipy_optimizer = external_optimizer.ScipyOptimizerInterface( loss, method='L-BFGS-B', callback=iter_callback, options={ 'disp': None, 'maxls': 20, 'iprint': -1, 'gtol': 1e-05, 'eps': 1e-08, 'maxiter': subiterations, 'ftol': 2.220446049250313e-09, 'maxcor': 10, 'maxfun': 15000 }) def print_progress(): stderr.write(' content loss: %g\n' % content_loss.eval()) stderr.write(' style loss: %g\n' % style_loss.eval()) stderr.write(' tv loss: %g\n' % tv_loss.eval()) stderr.write(' total loss: %g\n' % loss.eval()) opt_time = time.time() # optimization best_loss = float('inf') best = None with tf.Session() as sess: sess.run(tf.global_variables_initializer()) stderr.write('Optimization started..\n') print_progress() for i in range(iterations): iter_callback.time0 = time.time() if use_scipy_optimizer == 0: train_step.run() iter_callback() else: scipy_optimizer.minimize(sess) last_step = (i == iterations - 1) if last_step or (print_iterations and i % print_iterations == 0): print_progress() if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step: this_loss = loss.eval() if this_loss < best_loss: best_loss = this_loss best = image.eval() img_out = net_module.unprocess(best.reshape(shape[1:]), vgg_mean_pixel) if preserve_colors_coeff and preserve_colors_coeff != 0.0: img_out_pc = colortransfer( original_image=np.clip(content, 0, 255), styled_image=np.clip(img_out, 0, 255), mode='yuv') if preserve_colors_coeff == 1.0: img_out = img_out_pc else: img_out = img_out_pc * preserve_colors_coeff + img_out * ( 1.0 - preserve_colors_coeff) yield ((None if last_step else i), img_out) print("Optimization time: %fs" % (time.time() - opt_time))